commit 775f457aff0c728a49aae8398564f4e6067b8368 Author: CI Date: Tue May 27 14:53:56 2025 +0000 Build branch move-detect-demultiplex-subworkflow with version move-detect-demultiplex-subworkflow (84bb47b) Build pipeline: viash-hub.demultiplex.move-detect-demultiplex-subworkflow-lcs4p Source commit: https://github.com/viash-hub/demultiplex/commit/84bb47bfb87cd0420f52ec97c15236311de4c94c Source message: add detect_demultiplexer subworkflow diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2c35f84 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +target +testData +test_resources + +# Nextflow related files +.nextflow +.nextflow.log* +work diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ce2220a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,204 @@ +# demultiplex v0.3.12 + +## New features + +* Add support for Nextflow versions version starting 25.xx.xx (PR #50). + +## Bug fixes + +* Allow FASTQ files for `Undetermined` to be empty (PR #50). + +# demultiplex v0.3.11 + +## New features + +* Output demultiplexer logs and metrics (PR #41). + +# demultiplex v0.3.10 + +## Minor changes + +* Moved the test resources to their new location (PR #37). + +# demultiplex v0.3.9 + +## Bug fixes + +* Fix defaults for output arguments in nextflow schema's. + +* Fix an issue where an integer being passed to a argument with `type: double` resulted in an error (PR #44). + +## Minor changes + +* Bump viash to 0.9.4, which adds support for nextflow versions starting major version 25.01 (PR #43 and #44). + +# demultiplex v0.3.8 + +## Bug fixes + +* Provide a proper error when a FASTQ file is empty after demultiplexing (PR #40). + +# demultiplex v0.3.7 + +## Minor updates + +* Ignore lines starting with '#' when parsing run information CSV (PR #39). + +# demultiplex v0.3.6 + +## Minor updates + +* Allow letter case variants for headers when looking for sample information in run information CSV (PR #38). + +# demultiplex v0.3.5 + +## Breaking changes + +* The `demultiplex` workflow now outputs a list of directories + for the `output_falco` argument (one for each barcode) instead of one directory + for the complete run. The output from the `runner` workflow remained + unchanged (PR #33). + +## Minor updates + +* In case Illumina data is detected in the input folder, check for the presence of the 'copyComplete.txt' file. + This check can be disabled using `--skip_copycomplete_check` (PR #34). + +# demultiplex v0.3.4 + +## Minor updates + +* Resource labels are now automatically included during build (PR #32). + +# demultiplex v0.3.3 + +## Breaking change + +- The `runner` defines the output differently now: + + - The last part of the `--input` path is expected to be the run ID and this run ID is used to create the output directory. + - If the input is `file.tar.gz` instead of a directory, the `file` part is used as the run ID. + +- The output structure is then as follows: + + ``` + $publish_dir//_demultiplex_/ + ``` + + For instance: + + ``` + $publish_dir + └── 200624_A00834_0183_BHMTFYDRXX + └── 20241217_051404_demultiplex_v1.2 + ├── run_information.csv + ├── fastq + │   ├── Sample1_S1_L001_R1_001.fastq.gz + │   ├── Sample23_S3_L001_R1_001.fastq.gz + │   ├── SampleA_S2_L001_R1_001.fastq.gz + │   ├── Undetermined_S0_L001_R1_001.fastq.gz + │   └── sampletest_S4_L001_R1_001.fastq.gz + └── qc + ├── fastqc + │   ├── Sample1_S1_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── Sample1_S1_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── Sample1_S1_L001_R1_001.fastq.gz_summary.txt + │   ├── Sample23_S3_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── Sample23_S3_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── Sample23_S3_L001_R1_001.fastq.gz_summary.txt + │   ├── SampleA_S2_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── SampleA_S2_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── SampleA_S2_L001_R1_001.fastq.gz_summary.txt + │   ├── Undetermined_S0_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── Undetermined_S0_L001_R1_001.fastq.gz_fastqc_report.html + │   ├── Undetermined_S0_L001_R1_001.fastq.gz_summary.txt + │   ├── sampletest_S4_L001_R1_001.fastq.gz_fastqc_data.txt + │   ├── sampletest_S4_L001_R1_001.fastq.gz_fastqc_report.html + │   └── sampletest_S4_L001_R1_001.fastq.gz_summary.txt + └── multiqc_report.html + + ``` + +- This logic can be avoided by providing the flag `--plain_output`. + +# Minor updates + +* Added `output_run_information` argument that copies the run information file to the output (PR #31). + +# demultiplex v0.3.2 + +# Bug fixes + +* Ignore empty CSV entries when parsing sample information (PR #29). + +# demultiplex v0.3.1 + +# Minor updates + +* Add `--run_information` and `--demultiplexer` arguments to `runner` workflow (PR #27). + +# Bug fixes + +* Fix detection of sample IDs from Illumina V2 sample sheets (PR #28). + +* Provide a clear error message when `--run_information` is provided but not `--demultiplexer` (PR #27). + +# demultiplex v0.3.0 + +## Major updates + +The outflow of the workflow has been refactored to be more flexible (PR #19). This is done by creating a wrapper workflow `runner` that wraps the native `demultiplex` workflow. The `runner` workflow is responsible for setting the output directory based on the input arguments: + +3 arguments exist for specifying the relative location of the 3 _outputs_ of the workflow: + +- `fastq_output`: The directory where the demultiplexed fastq files are stored. +- `falco_output`: the directory for the `fastqc`/`falco` reports. +- `multiqc_output`: The filename for the `multiqc` report. + +The target location path is determined by the following logic: + +- If no `id` is provided, the output directory is set to `$publish_dir`. +- If an `id` is explicitly set using Seqera Cloud or by adding `--id <>`, the output directory is set to `$publish_dir/`. + +The workflow has two optional flags to be used in combination with `--id`: + +- `--add_date_time`: rather than publishing the results under `$publish_dir`, this adds an additional layer `$publish_dir//`. This is useful when you want to keep track of multiple runs of the workflow (example: `240322_143020`). +- `--add_workflow_id`: adding this flag will add `_demultiplex_` to the output directory (example: `demultiplex_v0.2.0`). When starting the workflow from a non-release, the version will be set to `version_unkonwn`. + +The default structure in the output directory is: + +- Two sub-directories: + - `fastq` + - `qc` for the reports: + - `multiqc_report.html` + - `fastqc/` directory containing the different fastqc (falco) reports. + +The `$publish_dir` variable corresponds to the argument provided with `--publish-dir`. The `date-time-stamp` is generated by the workflow based on when it was launched and is thus guaranteed to be unique. + +# demultiplex v0.2.0 + +## Breaking changes + +* `demultiplex` workflow: renamed `sample_sheet` argument to `run_information` (PR #24) + +## New features + +* Add support for `bases2fastq` demultiplexer (PR #24) + +## Minor updates + +* Add resource labels to workflows (PR #21). + +# demultiplex v0.1.1 + +## Minor updates + +* Bump viash to 0.9.0 (PR #14). + +* `demultiplex` workflow: use `v0.2.0` release instead of `main` branch for `biobox` dependencies (PR #11). + +* Renamed `biobase` repository to `biobox` (PR #13 and PR #15). + +# demultiplex v0.1.0 + +Initial release diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0208fb4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Data Intuitive + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ee95a20 --- /dev/null +++ b/README.md @@ -0,0 +1,219 @@ + + +# Demultiplex.vsh + +Demultiplex.vsh is a workflow for demultiplexing of raw sequencing data. +Currently data from Illumina and Element Biosciences sequencers are +supported. + +[![ViashHub](https://img.shields.io/badge/ViashHub-demultiplex-7a4baa.svg)](https://web.viash-hub.com/packages/demultiplex) +[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fdemultiplex-blue.svg)](https://github.com/viash-hub/demultiplex) +[![GitHub +License](https://img.shields.io/github/license/viash-hub/demultiplex.svg)](https://github.com/viash-hub/demultiplex/blob/main/LICENSE) +[![GitHub +Issues](https://img.shields.io/github/issues/viash-hub/demultiplex.svg)](https://github.com/viash-hub/demultiplex/issues) +[![Viash +version](https://img.shields.io/badge/Viash-v0.9.4-blue)](https://viash.io) + +## Introcuction + +This workflow is designed to demultiplex raw RNA-seq sequencing data +from Illumina and Element Biosciences sequencers. + +The workflow is built in a modular fashion, where most of the base +functionality is provided by components from +[`biobox`](https://www.viash-hub.com/packages/biobox/latest) +supplemented by custom base components and workflow components in this +package. Each of these components can be used independently as +stand-alone modules with a standardized interface. + +The full workflow can be run in two ways: + +1. Run the [main + workflow](https://www.viash-hub.com/packages/demultiplex/v0.3.4/components/demultiplex) + containing the main functionality. +2. Run the [(opinianated) + `runner`](https://www.viash-hub.com/packages/demultiplex/v0.3.4/components/runner) + where a number of choices (input/output structure and location) have + been made. + +## Workflow Overview + +The workflow executes the following steps: + +1. Unpacking the input data (when a TAR archive is provided) +2. Run `bclconvert` or `bases2fastq` +3. Run `falco` and convert Illumina InterOp information to csv +4. Run `multiqc` to generate a report + +## Example usage + +Two variants of the same workflow are provided, depending on the +flexibility in the ouput structure required: + +- The `runner` workflow provides a predifined output structure. It + requires the minimal amount of parameters to be provided, at the cost + of being less flexible. It is located at + `target/nextflow/runner/main.nf` +- The `demultiplex` workflow (`target/nextflow/demultiplex/main.nf`) + allows for more fine-grained tuning, but required more parameters to + be provided. + +### Test data + +We have provided test data at +`gs://viash-hub-resources/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2` +(Illumina), but please feel free to bring your own. The URL of the test +data can be provided as-is to the workflow, or you can download +everything and specify a local path. + +The input data should follow the structure of either Illumina or Element +Biosciences sequencers. The workflow will automatically detect which +demultiplexer to use (`bclconvert` or `bases2fastq`) based on the +presence of either `SampleSheet.csv` or `RunParameters.xml` in the input +directory. Demultiplexer can also be set explicitly using the +`--demultiplexer` parameter. + +### Setup + +In order to use the workflows in this package, you’ll need to do the +following: + +- Install [nextflow](https://www.nextflow.io/docs/latest/install.html) +- Install a nextflow compatible executor. This workflow provides a + profile for [docker](https://docs.docker.com/get-started/). + +### Run from Viash Hub + +1. Open [Viash Hub](https://www.viash-hub.com) and browse to the + [demultiplex + component](https://www.viash-hub.com/packages/demultiplex/v0.3.4/components/demultiplex). + Press the ‘Launch’ button and follow the instructions. + +![](assets/demultiplex-launch-small.png) + +2. We will start an example run and set profile to `docker`. + +![](assets/demultiplex-launch-parameters-1.png) + +3. In the next step, we provide the paramters as follows and leave the + rest as defalut: + +- `input`: + `gs://viash-hub-resources/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2` + +![](assets/demultiplex-launch-parameters-2.png) + +Press the ‘Launch’ button at the end to get the instructions on how to +run the workflow from the CLI. + +### Run using NF-Tower / Seqera Cloud + +It’s possible to run the workflow directly from [Seqera +Cloud](https://cloud.seqera.io). The necessary [Nextflow schema +file](https://nextflow-io.github.io/nf-schema/latest/nextflow_schema/nextflow_schema_specification/) +has been built and provided with the workflows in order to use the +form-based input. + +1. Select the option to run the workflow using Seqera Cloud. You will + need to create an API token for your account. Once this token is + filled in in the corresponding field, we will get the option to + select a ‘Workspace’ and a ‘Compute environment’. + +![](assets/demultiplex-launch-parameters-3.png) + +2. Provide the parameters similar to the previous step. + +3. In the next screen, pressing the ‘Launch’ button will actually start + the workflow on Seqera Cloud. A message is shown when the submit was + successful. + +![](assets/demultiplex-launch-parameters-4.png) + +### Setting up SCM + +In order to let nextflow use the viash-hub workflows, you need to setup +a [SCM](https://www.nextflow.io/docs/latest/git.html#git-configuration) +file. This can be done once by creating `$HOME/.nextflow/scm` and adding +the following: + + providers { + vsh { + platform = 'gitlab' + server = "packages.viash-hub.com" + } + } + +Alternatively, a custom location for the SCM file can be specified using +the `NXF_SCM_FILE` environment variable. + +You can check if everything is working by getting the `--help` for a +workflow: + +``` bash +nextflow run \ +vsh/demultiplex \ +-r v0.3.11 \ +--help +``` + +### Run from the CLI + +Running from the CLI directly without using Viash hub is possible as +well. The easiest is to use the integrated help functionality, for +instance using the following: + +``` bash + nextflow run vsh/demultiplex \ + -revision v0.3.11 \ + -main-script target/nextflow/workflows/runner/main.nf \ + --help +``` + +Having this project available locally, you can run the following +command: + +``` bash +nextflow run vsh/demultiplex \ +-r v0.3.11 \ +-main-script target/nextflow/runner/main.nf \ +--input "gs://viash-hub-resources/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2" \ +--demultiplexer bclconvert \ +--skip_copycomplete_check \ +--publish_dir example_output/ \ +-profile docker \ +-c src/config/labels.config +``` + +### (Optional) Resource usage tuning + +Nextflow’s labels can be used to specify the amount of resources a +process can use. This workflow uses the following labels for CPU and +memory: + +- `verylowmem`, `lowmem`, `midmem`, `highmem` +- `verylowcpu`, `lowcpu`, `midcpu`, `highcpu` + +The defaults for these labels can be found at +`src/config/labels.config`. Nextflow checks that the specified resources +for a process do not exceed what is available on the machine and will +not start if it does. Create your own config file to tune the labels to +your needs, for example: + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 16 } + + withLabel: verylowmem { memory = 4.GB } + withLabel: lowmem { memory = 8.GB } + withLabel: midmem { memory = 8.GB } + withLabel: highmem { memory = 8.GB } + +When starting nextflow using the CLI, you can use `-c` to provide the +file to nextflow and overwrite the defaults. + +## Acknowledgements + +Developed in collaboration with Data Intuitive and Open Analytics. diff --git a/README.qmd b/README.qmd new file mode 100644 index 0000000..2eba38d --- /dev/null +++ b/README.qmd @@ -0,0 +1,191 @@ +--- +format: gfm +--- +```{r setup, include=FALSE} +project <- yaml::read_yaml("_viash.yaml") +license <- paste0(project$links$repository, "/blob/main/LICENSE") +``` + +# Demultiplex.vsh + +Demultiplex.vsh is a workflow for demultiplexing of raw sequencing data. Currently data from Illumina and Element Biosciences sequencers are supported. + +[![ViashHub](https://img.shields.io/badge/ViashHub-demultiplex-7a4baa.svg)](https://web.viash-hub.com/packages/demultiplex) +[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fdemultiplex-blue.svg)](https://github.com/viash-hub/demultiplex) +[![GitHub +License](https://img.shields.io/github/license/viash-hub/demultiplex.svg)](https://github.com/viash-hub/demultiplex/blob/main/LICENSE) +[![GitHub +Issues](https://img.shields.io/github/issues/viash-hub/demultiplex.svg)](https://github.com/viash-hub/demultiplex/issues) +[![Viash +version](https://img.shields.io/badge/Viash-v0.9.4-blue)](https://viash.io) + +## Introcuction +This workflow is designed to demultiplex raw RNA-seq sequencing data from Illumina and Element Biosciences sequencers. + +The workflow is built in a modular fashion, where most of the base functionality is provided by components from +[`biobox`](https://www.viash-hub.com/packages/biobox/latest) supplemented by custom base components and workflow components in this package. Each of these components can be used independently as stand-alone modules with a +standardized interface. + +The full workflow can be run in two ways: + +1. Run the [main + workflow](https://www.viash-hub.com/packages/demultiplex/v0.3.4/components/demultiplex) + containing the main functionality. +2. Run the [(opinianated) + `runner`](https://www.viash-hub.com/packages/demultiplex/v0.3.4/components/runner) + where a number of choices (input/output structure and location) have + been made. + +## Workflow Overview + +The workflow executes the following steps: + +1. Unpacking the input data (when a TAR archive is provided) +2. Run `bclconvert` or `bases2fastq` +3. Run `falco` and convert Illumina InterOp information to csv +4. Run `multiqc` to generate a report + +## Example usage + +Two variants of the same workflow are provided, depending on the flexibility in the ouput structure required: + +* The `runner` workflow provides a predifined output structure. It requires the minimal amount of parameters to be provided, at the cost of being less flexible. It is located at `target/nextflow/runner/main.nf` +* The `demultiplex` workflow (`target/nextflow/demultiplex/main.nf`) allows for more fine-grained tuning, but required more parameters to be provided. + +### Test data + +We have provided test data at `gs://viash-hub-resources/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2` (Illumina), but please feel free to bring your own. The URL of the test data can be provided as-is to the workflow, or you can download everything and specify a local path. + +The input data should follow the structure of either Illumina or Element Biosciences sequencers. The workflow will automatically detect which demultiplexer to use (`bclconvert` or `bases2fastq`) based on the +presence of either `SampleSheet.csv` or `RunParameters.xml` in the input directory. Demultiplexer can also be set explicitly using the `--demultiplexer` parameter. + +### Setup + +In order to use the workflows in this package, you'll need to do the following: + +* Install [nextflow](https://www.nextflow.io/docs/latest/install.html) +* Install a nextflow compatible executor. This workflow provides a profile for [docker](https://docs.docker.com/get-started/). + +### Run from Viash Hub + +1. Open [Viash Hub](https://www.viash-hub.com) and browse to the [demultiplex +component](https://www.viash-hub.com/packages/demultiplex/v0.3.4/components/demultiplex). +Press the ‘Launch’ button and follow the instructions. + +![](assets/demultiplex-launch-small.png) + +2. We will start an example run and set profile to `docker`. + +![](assets/demultiplex-launch-parameters-1.png) + +3. In the next step, we provide the paramters as follows and leave the rest as defalut: + +- `input`: + `gs://viash-hub-resources/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2` + +![](assets/demultiplex-launch-parameters-2.png) + +Press the ‘Launch’ button at the end to get the instructions on how to +run the workflow from the CLI. + +### Run using NF-Tower / Seqera Cloud + +It’s possible to run the workflow directly from [Seqera +Cloud](https://cloud.seqera.io). The necessary [Nextflow schema +file](https://nextflow-io.github.io/nf-schema/latest/nextflow_schema/nextflow_schema_specification/) +has been built and provided with the workflows in order to use the +form-based input. + +1. Select the option to run the workflow using Seqera Cloud. You +will need to create an API token for your account. Once this token is +filled in in the corresponding field, we will get the option to select +a ‘Workspace’ and a ‘Compute environment’. + +![](assets/demultiplex-launch-parameters-3.png) + +2. Provide the parameters similar to the previous step. + +3. In the next screen, pressing the ‘Launch’ button will actually start the +workflow on Seqera Cloud. A message is shown when the submit was +successful. + +![](assets/demultiplex-launch-parameters-4.png) + +### Setting up SCM + +In order to let nextflow use the viash-hub workflows, you need to setup a [SCM](https://www.nextflow.io/docs/latest/git.html#git-configuration) file. This can be done once by creating `$HOME/.nextflow/scm` and adding the following: +``` +providers { + vsh { + platform = 'gitlab' + server = "packages.viash-hub.com" + } +} +``` + +Alternatively, a custom location for the SCM file can be specified using the `NXF_SCM_FILE` environment variable. + +You can check if everything is working by getting the `--help` for a workflow: +```bash +nextflow run \ +vsh/demultiplex \ +-r v0.3.11 \ +--help +``` + +### Run from the CLI + +Running from the CLI directly without using Viash hub is possible as well. The +easiest is to use the integrated help functionality, for instance +using the following: + +``` bash + nextflow run vsh/demultiplex \ + -revision v0.3.11 \ + -main-script target/nextflow/workflows/runner/main.nf \ + --help +``` + +Having this project available locally, you can run the following command: + +```bash +nextflow run vsh/demultiplex \ +-r v0.3.11 \ +-main-script target/nextflow/runner/main.nf \ +--input "gs://viash-hub-resources/demultiplex/v3/demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2" \ +--demultiplexer bclconvert \ +--skip_copycomplete_check \ +--publish_dir example_output/ \ +-profile docker \ +-c src/config/labels.config +``` + +### (Optional) Resource usage tuning + +Nextflow's labels can be used to specify the amount of resources a process can use. This workflow uses the following labels for CPU and memory: + +* `verylowmem`, `lowmem`, `midmem`, `highmem` +* `verylowcpu`, `lowcpu`, `midcpu`, `highcpu` + +The defaults for these labels can be found at `src/config/labels.config`. Nextflow checks that the specified resources for a process do not exceed what is available on the machine and will not start if it does. Create your own config file to tune the labels to your needs, for example: + +``` +// Resource labels +withLabel: verylowcpu { cpus = 2 } +withLabel: lowcpu { cpus = 8 } +withLabel: midcpu { cpus = 16 } +withLabel: highcpu { cpus = 16 } + +withLabel: verylowmem { memory = 4.GB } +withLabel: lowmem { memory = 8.GB } +withLabel: midmem { memory = 8.GB } +withLabel: highmem { memory = 8.GB } +``` + +When starting nextflow using the CLI, you can use `-c` to provide the file to nextflow and overwrite the defaults. + +## Acknowledgements + +Developed in collaboration with Data Intuitive and Open Analytics. + + diff --git a/_viash.yaml b/_viash.yaml new file mode 100644 index 0000000..d868256 --- /dev/null +++ b/_viash.yaml @@ -0,0 +1,20 @@ +name: demultiplex +description: | + Demultiplexing pipeline +license: MIT +keywords: [bioinformatics, sequence, demultiplexing, pipeline] +links: + issue_tracker: https://github.com/viash-hub/demultiplex/issues + repository: https://github.com/viash-hub/demultiplex +info: + test_resources: + - path: gs://viash-hub-resources/demultiplex/v3 + dest: testData + +viash_version: 0.9.4 + +config_mods: | + .requirements.commands += ['ps'] + .runners[.type == 'nextflow'].directives.tag := '$id' + .resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'} + .runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")' diff --git a/assets/demultiplex-launch-parameters-1.png b/assets/demultiplex-launch-parameters-1.png new file mode 100644 index 0000000..046e9d8 Binary files /dev/null and b/assets/demultiplex-launch-parameters-1.png differ diff --git a/assets/demultiplex-launch-parameters-2.png b/assets/demultiplex-launch-parameters-2.png new file mode 100644 index 0000000..b9d1521 Binary files /dev/null and b/assets/demultiplex-launch-parameters-2.png differ diff --git a/assets/demultiplex-launch-parameters-3.png b/assets/demultiplex-launch-parameters-3.png new file mode 100644 index 0000000..8385c7b Binary files /dev/null and b/assets/demultiplex-launch-parameters-3.png differ diff --git a/assets/demultiplex-launch-parameters-4.png b/assets/demultiplex-launch-parameters-4.png new file mode 100644 index 0000000..b15361e Binary files /dev/null and b/assets/demultiplex-launch-parameters-4.png differ diff --git a/assets/demultiplex-launch-small.png b/assets/demultiplex-launch-small.png new file mode 100644 index 0000000..eb98c7b Binary files /dev/null and b/assets/demultiplex-launch-small.png differ diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..fd40518 --- /dev/null +++ b/main.nf @@ -0,0 +1,3 @@ +workflow { + print("This is a dummy placeholder for pipeline execution. Please use the corresponding nf files for running pipelines.") +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..6536fa9 --- /dev/null +++ b/nextflow.config @@ -0,0 +1,12 @@ +manifest { + homePage = 'https://github.com/viash-hub/demultiplex' + description = 'Demultiplexing pipeline for sequencing data' + mainScript = 'target/nextflow/demultiplex/main.nf' +} + +process { + withName: publishStatesProc { + publishDir = [ enabled: false ] + } +} + diff --git a/src/config/labels.config b/src/config/labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/src/config/labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/src/dataflow/combine_samples/config.vsh.yaml b/src/dataflow/combine_samples/config.vsh.yaml new file mode 100644 index 0000000..d24a6fc --- /dev/null +++ b/src/dataflow/combine_samples/config.vsh.yaml @@ -0,0 +1,48 @@ +name: combine_samples +namespace: dataflow +description: Combine fastq files from across samples into one event with a list of fastq files per orientation. +argument_groups: + - name: Input arguments + arguments: + - name: "--id" + description: "ID of the new event" + type: string + required: true + - name: --forward_input + type: file + required: true + multiple: true + - name: --reverse_input + type: file + required: false + multiple: true + - name: "--falco_dir" + type: file + required: true + - name: Output arguments + arguments: + - name: --output_forward + type: file + direction: output + multiple: true + required: true + - name: --output_reverse + type: file + direction: output + multiple: true + required: false + - name: "--output_falco" + type: file + direction: output + required: true + multiple: true +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +runners: + - type: nextflow + +engines: + - type: native diff --git a/src/dataflow/combine_samples/main.nf b/src/dataflow/combine_samples/main.nf new file mode 100644 index 0000000..d891536 --- /dev/null +++ b/src/dataflow/combine_samples/main.nf @@ -0,0 +1,30 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | map { id, state -> + def newEvent = [state.id, state + ["_meta": ["join_id": id]]] + newEvent + } + | groupTuple(by: 0, sort: "hash") + | map {run_id, states -> + // Gather the following state for all samples + def forward_fastqs = states.collect{it.forward_input}.flatten() + def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten() + def falco_dirs = states.collect{it.falco_dir} + + def resultState = [ + "output_forward": forward_fastqs, + "output_reverse": reverse_fastqs, + "output_falco": falco_dirs, + // The join ID is the same across all samples from the same run + "_meta": ["join_id": states[0]._meta.join_id] + ] + return [run_id, resultState] + } + + emit: + output_ch +} \ No newline at end of file diff --git a/src/dataflow/gather_fastqs_and_validate/config.vsh.yaml b/src/dataflow/gather_fastqs_and_validate/config.vsh.yaml new file mode 100644 index 0000000..ca8a430 --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/config.vsh.yaml @@ -0,0 +1,49 @@ +name: gather_fastqs_and_validate +namespace: dataflow +description: | + From a directory containing fastq files, gather the files per sample + and validate according to the contents of the sample sheet. +argument_groups: + - name: Input arguments + arguments: + - name: --input + description: Directory containing .fastq files + type: file + required: true + - name: --sample_sheet + description: Sample sheet + type: file + required: true + - name: Output arguments + arguments: + - name: --fastq_forward + type: file + direction: output + required: true + multiple: true + - name: "--fastq_reverse" + type: file + direction: output + required: false + multiple: true +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf +test_resources: + - type: nextflow_script + path: test.nf + entrypoint: test_gather_and_validate + - type: nextflow_script + path: test.nf + entrypoint: test_undetermined_empty + - type: nextflow_script + path: test.nf + entrypoint: test_without_index + - path: test_data + +runners: + - type: nextflow + +engines: + - type: native diff --git a/src/dataflow/gather_fastqs_and_validate/integration_tests.sh b/src/dataflow/gather_fastqs_and_validate/integration_tests.sh new file mode 100755 index 0000000..5135514 --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/integration_tests.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +set -eo pipefail + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +viash ns build --setup cb -q gather_fastqs_and_validate + +nextflow run . \ + -main-script src/dataflow/gather_fastqs_and_validate/test.nf \ + -profile docker,no_publish,local \ + -entry test_gather_and_validate \ + -c src/config/labels.config \ + -resume + +nextflow run . \ + -main-script src/dataflow/gather_fastqs_and_validate/test.nf \ + -profile docker,no_publish,local \ + -entry test_undetermined_empty \ + -c src/config/labels.config \ + -resume + +nextflow run . \ + -main-script src/dataflow/gather_fastqs_and_validate/test.nf \ + -profile docker,no_publish,local \ + -entry test_without_index \ + -c src/config/labels.config \ + -resume + diff --git a/src/dataflow/gather_fastqs_and_validate/main.nf b/src/dataflow/gather_fastqs_and_validate/main.nf new file mode 100644 index 0000000..0dba601 --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/main.nf @@ -0,0 +1,122 @@ +import java.util.zip.GZIPInputStream +import java.nio.file.Files +import java.io.BufferedInputStream + +def is_empty(file_to_check){ + /* + Checks if a file has content + */ + if (file_to_check.size() == 0) { + return true + } + def input_stream = Files.newInputStream(file_to_check) + def gzInputStream + try { + gzInputStream = new GZIPInputStream(new BufferedInputStream(input_stream)) + } catch (java.io.EOFException ex) { + // This is not a gzipfile... + return false + } + def read_one_byte = gzInputStream.read() + return read_one_byte == -1 +} + +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + // Gather input files from BCL convert output folder + | flatMap { id, state -> + println "Processing sample sheet: $state.sample_sheet" + def sample_sheet = state.sample_sheet + def start_parsing = false + def sample_id_column_index = null + def undetermined_sample_name = "Undetermined" + def samples = [undetermined_sample_name] + def original_id = id + + // Parse sample sheet for sample IDs + println "Processing run information file ${sample_sheet}" + csv_lines = sample_sheet.splitCsv(header: false, sep: ',') + csv_lines.any { csv_items -> + if (csv_items.isEmpty() || csv_items[0].startsWith("#")) { + // skip empty or commented line + return + } + def possible_header = csv_items[0] + def header = possible_header.find(/\[(.*)\]/){fullmatch, header_name -> header_name} + if (header) { + if (start_parsing) { + // Stop parsing when encountering the next header + println "Encountered next header '[${start_parsing}]', stopping parsing." + return true + } + // [Data], [BCLConvert_Data] for illumina + // [Samples] or sometimes [SAMPLES] for Element Biosciences + if (header.toLowerCase() in ["data", "samples", "bclconvert_data"]) { + println "Found header [${header}], start parsing." + start_parsing = true + return + } + } + if (start_parsing) { + if ( sample_id_column_index == null) { + println "Looking for sample name column." + sample_id_column_index = csv_items.findIndexValues{it == "Sample_ID" || it == "SampleName"} + assert (!sample_id_column_index.isEmpty()): + "Could not find column 'Sample_ID' (Illumina) or 'SampleName' " + + "(Element Biosciences) in run information! Found: ${sample_id_column_index}" + assert sample_id_column_index.size() == 1, "Expected run information file to contain " + + "a column 'Sample_ID' or 'SampleName', not both. Found: ${sample_id_column_index}" + sample_id_column_index = sample_id_column_index[0] + println "Found sample names column '${csv_items[sample_id_column_index]}'." + return + } + def candidate_sample_id = csv_items[sample_id_column_index] + if (candidate_sample_id?.trim()) { // Don't add empty csv entries. + samples += csv_items[sample_id_column_index] + } + } + // This return is important! (If 'true' is returned, the parsing stops.) + return + } + assert start_parsing: + "Sample information file does not contain [Data], [Samples] or [BCLConvert_Data] header!" + assert samples.size() > 1: + "Sample information file does not seem to contain any information about the samples!" + println "Finished processing run information file, found samples: ${samples}." + println "Looking for fastq files in ${state.input}." + def allfastqs = state.input.listFiles().findAll{it.isFile() && it.name ==~ /^.+\.fastq.gz$/} + println "Found ${allfastqs.size()} fastq files, matching them to the following samples: ${samples}." + processed_samples = samples.collect { sample_id -> + def forward_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R1_(\d+)\.fastq\.gz$/ + def reverse_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R2_(\d+)\.fastq\.gz$/ + // Sort is needed here because multiple lanes (_L00*_) might be present and they need to be in the same order in both lists + def forward_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ forward_regex}.sort() + def reverse_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ reverse_regex}.sort() + assert forward_fastq && !forward_fastq.isEmpty(): "No forward fastq files were found for sample ${sample_id}. " + + "All fastq files in directory: ${allfastqs.collect{it.name}}" + assert (reverse_fastq.isEmpty() || (forward_fastq.size() == reverse_fastq.size())): + "Expected equal number of forward and reverse fastq files for sample ${sample_id}. " + + "Found forward: ${forward_fastq} and reverse: ${reverse_fastq}." + println "Found ${forward_fastq.size()} forward and ${reverse_fastq.size()} reverse " + + "fastq files for sample ${sample_id}" + + assert sample_id == undetermined_sample_name || (forward_fastq.every{!is_empty(it)} && reverse_fastq.every{!is_empty(it)}): + "A fastq file for sample '${sample_id}' appears to be empty!" + def fastqs_state = [ + "fastq_forward": forward_fastq, + "fastq_reverse": reverse_fastq, + "_meta": [ "join_id": original_id ], + ] + [sample_id, fastqs_state] + } + println "Finished processing sample sheet." + return processed_samples + } + + emit: + output_ch +} \ No newline at end of file diff --git a/src/dataflow/gather_fastqs_and_validate/nextflow.config b/src/dataflow/gather_fastqs_and_validate/nextflow.config new file mode 100644 index 0000000..bb99592 --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/nextflow.config @@ -0,0 +1,10 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' +} + +params { + rootDir = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString() +} + +// include common settings +includeConfig("${params.rootDir}/src/config/labels.config") diff --git a/src/dataflow/gather_fastqs_and_validate/test.nf b/src/dataflow/gather_fastqs_and_validate/test.nf new file mode 100644 index 0000000..42b213e --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/test.nf @@ -0,0 +1,86 @@ +nextflow.enable.dsl=2 + +include { gather_fastqs_and_validate } from params.rootDir + "/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf" + + +workflow test_gather_and_validate { + output_ch = Channel.fromList([ + [ + id: "run1", + input: params.rootDir + "/src/dataflow/gather_fastqs_and_validate/test_data/fastqs", + sample_sheet: params.rootDir + "/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet.csv", + ] + ]) + | map { state -> [state.id, state]} + | gather_fastqs_and_validate.run(toState: ["fastq_forward", "fastq_reverse"]) + + output_ch + | toSortedList{a, b -> a[0] <=> b[0]} + | view {"Output: $it"} + | map { + assert it.size() == 3: "Expected three fastq pairs" + def first_pair = it[0][1] + assert first_pair.fastq_forward.collect{it.name} == ["Undetermined_S1_R1_001.fastq.gz"] + assert first_pair.fastq_reverse.collect{it.name} == ["Undetermined_S1_R2_001.fastq.gz"] + def second_pair = it[1][1] + assert second_pair.fastq_forward.collect{it.name} == ["sample1_S1_L001_R1_001.fastq.gz", "sample1_S1_L002_R1_001.fastq.gz"] + assert second_pair.fastq_reverse.collect{it.name} == ["sample1_S1_L001_R2_001.fastq.gz", "sample1_S1_L002_R2_001.fastq.gz"] + def undetermined_pair = it[2][1] + assert undetermined_pair.fastq_forward.collect{it.name} == ["sample2_S1_L001_R1_001.fastq.gz"] + assert undetermined_pair.fastq_reverse.collect{it.name} == ["sample2_S1_L001_R2_001.fastq.gz"] + + } +} + + +workflow test_undetermined_empty { + output_ch = Channel.fromList([ + [ + id: "run1", + input: params.rootDir + "/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty", + sample_sheet: params.rootDir + "/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet.csv", + ] + ]) + | map { state -> [state.id, state]} + | gather_fastqs_and_validate.run(toState: ["fastq_forward", "fastq_reverse"]) + + output_ch + | toSortedList{a, b -> a[0] <=> b[0]} + | view {"Output: $it"} + | map { + assert it.size() == 3: "Expected three fastq pairs" + def first_pair = it[0][1] + assert first_pair.fastq_forward.collect{it.name} == ["Undetermined_S1_R1_001.fastq.gz"] + assert first_pair.fastq_reverse.collect{it.name} == ["Undetermined_S1_R2_001.fastq.gz"] + def second_pair = it[1][1] + assert second_pair.fastq_forward.collect{it.name} == ["sample1_S1_L001_R1_001.fastq.gz", "sample1_S1_L002_R1_001.fastq.gz"] + assert second_pair.fastq_reverse.collect{it.name} == ["sample1_S1_L001_R2_001.fastq.gz", "sample1_S1_L002_R2_001.fastq.gz"] + def undetermined_pair = it[2][1] + assert undetermined_pair.fastq_forward.collect{it.name} == ["sample2_S1_L001_R1_001.fastq.gz"] + assert undetermined_pair.fastq_reverse.collect{it.name} == ["sample2_S1_L001_R2_001.fastq.gz"] + + } +} + +workflow test_without_index { + output_ch = Channel.fromList([ + [ + id: "run1", + input: params.rootDir + "/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty", + sample_sheet: params.rootDir + "/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet_no_index.csv", + ] + ]) + | map { state -> [state.id, state]} + | gather_fastqs_and_validate.run(toState: ["fastq_forward", "fastq_reverse"]) + + output_ch + | toSortedList{a, b -> a[0] <=> b[0]} + | view {"Output: $it"} + | map { + assert it.size() == 2: "Expected two fastq pairs" + def first_pair = it[0][1] + assert first_pair.fastq_forward.collect{it.name} == ["Undetermined_S1_R1_001.fastq.gz"] + assert first_pair.fastq_reverse.collect{it.name} == ["Undetermined_S1_R2_001.fastq.gz"] + + } +} \ No newline at end of file diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/Undetermined_S1_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/Undetermined_S1_R1_001.fastq.gz new file mode 100644 index 0000000..e601d97 Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/Undetermined_S1_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/Undetermined_S1_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/Undetermined_S1_R2_001.fastq.gz new file mode 100644 index 0000000..1d4f9ae Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/Undetermined_S1_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L001_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L001_R1_001.fastq.gz new file mode 100644 index 0000000..2a53b5a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L001_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L001_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L001_R2_001.fastq.gz new file mode 100644 index 0000000..34d4c6a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L001_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L002_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L002_R1_001.fastq.gz new file mode 100644 index 0000000..2a53b5a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L002_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L002_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L002_R2_001.fastq.gz new file mode 100644 index 0000000..34d4c6a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample1_S1_L002_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample2_S1_L001_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample2_S1_L001_R1_001.fastq.gz new file mode 100644 index 0000000..cda3a3b Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample2_S1_L001_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample2_S1_L001_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample2_S1_L001_R2_001.fastq.gz new file mode 100644 index 0000000..765f332 Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs/sample2_S1_L001_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/Undetermined_S1_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/Undetermined_S1_R1_001.fastq.gz new file mode 100644 index 0000000..4894f24 Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/Undetermined_S1_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/Undetermined_S1_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/Undetermined_S1_R2_001.fastq.gz new file mode 100644 index 0000000..cf9f0ef Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/Undetermined_S1_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L001_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L001_R1_001.fastq.gz new file mode 100644 index 0000000..2a53b5a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L001_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L001_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L001_R2_001.fastq.gz new file mode 100644 index 0000000..34d4c6a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L001_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L002_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L002_R1_001.fastq.gz new file mode 100644 index 0000000..2a53b5a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L002_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L002_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L002_R2_001.fastq.gz new file mode 100644 index 0000000..34d4c6a Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample1_S1_L002_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample2_S1_L001_R1_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample2_S1_L001_R1_001.fastq.gz new file mode 100644 index 0000000..cda3a3b Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample2_S1_L001_R1_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample2_S1_L001_R2_001.fastq.gz b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample2_S1_L001_R2_001.fastq.gz new file mode 100644 index 0000000..765f332 Binary files /dev/null and b/src/dataflow/gather_fastqs_and_validate/test_data/fastqs_undetermined_empty/sample2_S1_L001_R2_001.fastq.gz differ diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet.csv b/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet.csv new file mode 100644 index 0000000..49df5b6 --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet.csv @@ -0,0 +1,11 @@ +[foo],,,, +[somecontent],,,, +bar,lorem,,, +,,,, +# Comment + +[BCLConvert_Data],,,, +Sample_ID,Index,Index2,, +sample1,GTAGCCCTGT,GAGCATCTAT,, +sample2,TCGGCTCTAC,CCGATGGTCT,, +,,,, \ No newline at end of file diff --git a/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet_no_index.csv b/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet_no_index.csv new file mode 100644 index 0000000..2beb549 --- /dev/null +++ b/src/dataflow/gather_fastqs_and_validate/test_data/samplesheet_no_index.csv @@ -0,0 +1,10 @@ +[foo],,,, +[somecontent],,,, +bar,lorem,,, +,,,, +# Comment + +[BCLConvert_Data],,,, +Sample_ID,Index,Index2,, +sample1,,,, +,,,, \ No newline at end of file diff --git a/src/demultiplex/config.vsh.yaml b/src/demultiplex/config.vsh.yaml new file mode 100644 index 0000000..8aa6c6f --- /dev/null +++ b/src/demultiplex/config.vsh.yaml @@ -0,0 +1,112 @@ +name: demultiplex +description: Demultiplexing of raw sequencing data +argument_groups: + - name: Input arguments + arguments: + - name: --id + description: Unique identifier for the run + type: string + - name: --input + description: Directory containing raw sequencing data + type: file + required: true + - name: --run_information + description: | + CSV file containing sample information, which will be used as + input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina) + or 'RunManifest.csv' (Element Biosciences). If not specified, + will try to autodetect the sample sheet in the input directory. + Requires --demultiplexer to be set. + type: file + required: false + - name: "--demultiplexer" + type: string + required: false + choices: ["bases2fastq", "bclconvert"] + description: | + Demultiplexer to use, choice depends on the provider + of the instrument that was used to generate the data. + When not using --sample_sheet, specifying this argument is not + required. + - name: Output arguments + arguments: + - name: --output + description: Directory to write fastq data to + type: file + direction: output + required: false + default: "$id/fastq" + - name: "--output_falco" + description: Directory to write falco output to + type: file + direction: output + required: false + multiple: true + default: "$id/qc/fastqc" + - name: "--output_multiqc" + description: Directory to write falco output to + type: file + direction: output + required: false + default: "$id/qc/multiqc_report.html" + - name: "--output_run_information" + type: file + direction: "output" + required: true + default: "$id/run_information.csv" + - name: "--demultiplexer_logs" + type: file + direction: output + required: true + default: "$id/demultiplexer_logs" + - name: "Other arguments" + arguments: + - name: --skip_copycomplete_check + type: boolean_true + description: | + Disable the check for the presence of a "CopyComplete.txt" file in input + directory in case of Illumina data. + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +test_resources: + - type: nextflow_script + path: test.nf + entrypoint: test_illumina + - type: nextflow_script + path: test.nf + entrypoint: test_bases2fastq + +dependencies: + - name: io/untar + repository: local + - name: dataflow/gather_fastqs_and_validate + repository: local + - name: io/interop_summary_to_csv + repository: local + - name: dataflow/combine_samples + repository: local + - name: bcl_convert + repository: bb + - name: bases2fastq + repository: bb + - name: falco + repository: bb + - name: multiqc + repository: bb + - name: detect_demultiplexer + repository: local +repositories: + - name: bb + type: vsh + repo: biobox + tag: v0.3.1 + +runners: + - type: nextflow + +engines: + - type: native diff --git a/src/demultiplex/integration_tests.sh b/src/demultiplex/integration_tests.sh new file mode 100755 index 0000000..53f7283 --- /dev/null +++ b/src/demultiplex/integration_tests.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +viash ns build --setup cb -q demultiplex + +nextflow run . \ + -main-script src/demultiplex/test.nf \ + -profile docker,no_publish,local \ + -entry test_illumina \ + -c src/config/labels.config \ + --resources_test https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NovaSeq6000/ \ + -resume + + nextflow run . \ + -main-script src/demultiplex/test.nf \ + -profile docker,no_publish,local \ + -entry test_bases2fastq \ + -c src/config/labels.config \ + -resume \ No newline at end of file diff --git a/src/demultiplex/main.nf b/src/demultiplex/main.nf new file mode 100644 index 0000000..35e7f0b --- /dev/null +++ b/src/demultiplex/main.nf @@ -0,0 +1,182 @@ +workflow run_wf { + take: + input_ch + + main: + samples_ch = input_ch + + // untar input if needed + | untar.run( + directives: [label: ["lowmem", "lowcpu"]], + runIf: {id, state -> + def inputStr = state.input.toString() + inputStr.endsWith(".tar.gz") || \ + inputStr.endsWith(".tar") || \ + inputStr.endsWith(".tgz") ? true : false + }, + fromState: [ + "input": "input", + ], + toState: { id, result, state -> + state + ["input": result.output] + }, + ) + + // detect demultiplexer + | detect_demultiplexer.run( + fromState: [ + "input": "input", + "run_information": "run_information", + "demultiplexer": "demultiplexer", + ], + toState: { id, result, state -> + state + [ + "demultiplexer": result.demultiplexer_output, + "run_information": result.run_information_output + ] + } + ) + + | interop_summary_to_csv.run( + runIf: {id, state -> state.demultiplexer in ["bclconvert"]}, + directives: [label: ["lowmem", "verylowcpu"]], + fromState: [ + "input": "input", + ], + toState: [ + "interop_run_summary": "output_run_summary", + "interop_index_summary": "output_index_summary", + ] + ) + // run bcl_convert + | bcl_convert.run( + runIf: {id, state -> state.demultiplexer in ["bclconvert"]}, + directives: [label: ["highmem", "midcpu"]], + fromState: { id, state -> + [ + bcl_input_directory: state.input, + sample_sheet: state.run_information, + output_directory: state.output, + reports: state.demultiplexer_logs, + logs: state.demultiplexer_logs, + ] + }, + toState: {id, result, state -> + def toAdd = [ + "output_demultiplexer" : result.output_directory, + "run_id": id, + "demultiplexer_logs": result.reports, + ] + def newState = state + toAdd + return newState + } + ) + // run bases2fastq + | bases2fastq.run( + runIf: {id, state -> state.demultiplexer in ["bases2fastq"]}, + directives: [label: ["highmem", "midcpu"]], + fromState: { id, state -> + [ + "analysis_directory": state.input, + "run_manifest": state.run_information, + "output_directory": state.output, + "report": state.demultiplexer_logs + "/report.html", + "logs": state.demultiplexer_logs, + ] + }, + args: [ + "no_projects": true, // Do not put output files in a subfolder for project + //"split_lanes": true, + "legacy_fastq": true, // Illumina style output names + "group_fastq": true, // No subdir per sample + ], + toState: {id, result, state -> + def toAdd = [ + "output_demultiplexer" : result.output_directory, + "run_id": id, + "demultiplexer_logs": result.logs, + + ] + def newState = state + toAdd + return newState + } + ) + | gather_fastqs_and_validate.run( + fromState: [ + "input": "output_demultiplexer", + "sample_sheet": "run_information", + ], + toState: [ + "fastq_forward": "fastq_forward", + "fastq_reverse": "fastq_reverse", + ], + ) + + output_ch = samples_ch + | falco.run( + directives: [label: ["verylowcpu", "lowmem"]], + fromState: {id, state -> + [ + "input": [state.fastq_forward, state.fastq_reverse], + "outdir": "$id/qc/falco", + "summary_filename": null, + "report_filename": null, + "data_filename": null, + ] + }, + toState: { id, result, state -> + state + [ "output_falco" : result.outdir ] + } + ) + + | combine_samples.run( + fromState: { id, state -> + [ + "id": state.run_id, + "forward_input": state.fastq_forward, + "reverse_input": state.fastq_reverse, + "falco_dir": state.output_falco, + ] + }, + toState: [ + "forward_fastqs": "output_forward", + "reverse_fastqs": "output_reverse", + "output_falco": "output_falco", + ] + ) + + | multiqc.run( + directives: [label: ["midcpu", "midmem"]], + fromState: {id, state -> + def new_state = [ + "input": state.output_falco, + "output_report": state.output_multiqc, + "cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}' + ] + if (state.demultiplexer == "bclconvert") { + new_state["input"] += [ + state.interop_run_summary.getParent(), + state.interop_index_summary.getParent() + ] + } + return new_state + }, + toState: { id, result, state -> + state + [ "output_multiqc" : result.output_report ] + } + ) + + | setState( + [ + //"_meta": "_meta", + "output": "output_demultiplexer", + "output_falco": "output_falco", + "output_multiqc": "output_multiqc", + "output_run_information": "run_information", + "demultiplexer_logs": "demultiplexer_logs" + ] + ) + + emit: + output_ch +} diff --git a/src/demultiplex/nextflow.config b/src/demultiplex/nextflow.config new file mode 100644 index 0000000..7476563 --- /dev/null +++ b/src/demultiplex/nextflow.config @@ -0,0 +1,10 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' +} + +params { + rootDir = java.nio.file.Paths.get("$projectDir/../../").toAbsolutePath().normalize().toString() +} + +// include common settings +includeConfig("${params.rootDir}/src/config/labels.config") diff --git a/src/demultiplex/test.nf b/src/demultiplex/test.nf new file mode 100644 index 0000000..64a54bf --- /dev/null +++ b/src/demultiplex/test.nf @@ -0,0 +1,116 @@ +nextflow.enable.dsl=2 + +include { demultiplex } from params.rootDir + "/target/nextflow/demultiplex/main.nf" + +params.resources_test = params.rootDir + "/testData/" + +workflow test_illumina { + output_ch = Channel.fromList([ + [ + // sample_sheet: resources_test.resolve("bcl_convert_samplesheet.csv"), + // input: resources_test.resolve("iseq-DI/"), + //sample_sheet: "https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NovaSeq6000/SampleSheet.csv", + input: params.resources_test + "200624_A00834_0183_BHMTFYDRXX.tar.gz", + publish_dir: "output_dir/", + ] + ]) + | map { state -> [ "run", state ] } + | demultiplex.run( + toState: { id, output, state -> + output + [ orig_input: state.input ] } + ) + | view { output -> + assert output.size() == 2 : "outputs should contain two elements; [id, file]" + "Output: $output" + } + + event_count_ch = output_ch + | toSortedList() + | map { state -> + assert state.size() == 1 : "Expected one event in the output channel" + } + + assert_ch = output_ch + | map {id, state -> + assert state.output.isDirectory(): "Expected bclconvert output to be a directory" + state.output_falco.each{ + assert it.isDirectory(): "Expected falco output to be a directory" + } + assert state.output_multiqc.isFile(): "Expected multiQC output to be a file" + fastq_files = state.output.listFiles().collect{it.name} + assert ["Undetermined_S0_L001_R1_001.fastq.gz", "Sample23_S3_L001_R1_001.fastq.gz", + "sampletest_S4_L001_R1_001.fastq.gz", "Sample1_S1_L001_R1_001.fastq.gz", + "SampleA_S2_L001_R1_001.fastq.gz"].toSet() == fastq_files.toSet(): \ + "Output directory should contain the expected FASTQ files" + fastq_files.each{ + assert it.length() != 0: "Expected FASTQ file to not be empty" + } + assert state.output_run_information.isFile(): "Expected output run information to be a file" + expected_run_information = """[Header] + |Date,6/24/2020 + |Application,Illumina DRAGEN COVIDSeq Test Pipeline + |Instrument Type,NovaSeq6000 + |Assay,Illumina COVIDSeq Test + |Index Adapters,IDT-ILMN DNA-RNA UDP Indexes + |Chemistry,Amplicon + + |[Settings] + |AdapterRead1,CTGTCTCTTATACACATCT + + |[Data] + |Lane,Sample_ID,Sample_Type,Index_ID,Index,Index2 + |1,Sample1,PatientSample,UDP0001,GAACTGAGCG,TCGTGGAGCG + |1,SampleA,PatientSample,UDP0002,AGGTCAGATA,CTACAAGATA + |1,Sample23,PatientSample,UDP0003,CGTCTCATAT,TATAGTAGCT + |1,sampletest,PatientSample,UDP0004,ATTCCATAAG,TGCCTGGTGG + |""".stripMargin() + assert state.output_run_information.text.replaceAll("\r\n", "\n") == expected_run_information + + println "ID: ${id}" + println "State: ${state}" + + assert state.demultiplexer_logs.isDirectory(): + "Expected BCL Convert reports to be a directory" + + def logs_files = state.demultiplexer_logs.listFiles() + println "Logs files: ${logs_files}" + assert logs_files.size() > 0: "Expected BCL Convert logs dir to contain files" + + assert logs_files.find { it.name == "Demultiplex_Stats.csv" }: + "Expected to find BCL Convert Demultiplex_Stats.csv" + assert logs_files.find { it.name == "Logs" }: + "Expected to find BCL Convert Logs directory" + } +} + +workflow test_bases2fastq { + output_ch = Channel.fromList([ + [ + input: "http://element-public-data.s3.amazonaws.com/bases2fastq-share/bases2fastq-v2/20230404-bases2fastq-sim-151-151-9-9.tar.gz", + publish_dir: "output_dir/", + ] + ]) + | map { state -> [ "run", state ] } + | demultiplex.run( + toState: { id, output, state -> + output + [ orig_input: state.input ] } + ) + | view { output -> + assert output.size() == 2 : "outputs should contain two elements; [id, file]" + "Output: $output" + } + | map {id, state -> + assert state.output.isDirectory(): "Expected bases2fastq output to be a directory" + state.output_falco.each{assert it.isDirectory(): "Expected falco output to be a directory"} + assert state.output_multiqc.isFile(): "Expected multiQC output to be a file" + + def logs_files = state.demultiplexer_logs.listFiles() + println "Logs files: ${logs_files}" + assert logs_files.size() > 0: "Expected bases2fastq logs dir to contain files" + + assert logs_files.find { it.name == "report.html" } != null: + "Expected to find bases2fastq report.html" + assert logs_files.find { it.name == "info" }: + "Expected to find bases2fastq info directory" + } +} diff --git a/src/detect_demultiplexer/config.vsh.yaml b/src/detect_demultiplexer/config.vsh.yaml new file mode 100644 index 0000000..819cb90 --- /dev/null +++ b/src/detect_demultiplexer/config.vsh.yaml @@ -0,0 +1,52 @@ +name: detect_demultiplexer +description: | + Detects the demultiplexer used to generate the fastq files and runs the appropriate + demultiplexing workflow. +arguments: + - name: --id + description: Unique identifier for the run + type: string + - name: --input + description: Directory containing raw sequencing data + type: file + required: true + - name: --run_information + description: | + CSV file containing sample information, which will be used as + input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina) + or 'RunManifest.csv' (Element Biosciences). If not specified, + will try to autodetect the sample sheet in the input directory. + Requires --demultiplexer to be set. + type: file + required: false + - name: "--demultiplexer" + type: string + required: false + choices: ["bases2fastq", "bclconvert"] + description: | + Demultiplexer to use, choice depends on the provider + of the instrument that was used to generate the data. + When not using --sample_sheet, specifying this argument is not + required. + + - name: --demultiplexer_output + description: Output demultiplexer + type: string + direction: output + required: false + - name: --run_information_output + description: Output run information + type: file + direction: output + required: false + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +runners: + - type: nextflow + +engines: + - type: native \ No newline at end of file diff --git a/src/detect_demultiplexer/main.nf b/src/detect_demultiplexer/main.nf new file mode 100644 index 0000000..3671c13 --- /dev/null +++ b/src/detect_demultiplexer/main.nf @@ -0,0 +1,96 @@ +workflow run_wf { + take: + input_ch // Channel with [id, state] pairs + + main: + output_ch = input_ch + + // Gather input files from folder + | map {id, state -> + def newState = [:] + println("Provided run information: ${state.run_information} and demultiplexer: ${state.demultiplexer}") + // No auto-detection of run information file (it is user provided), + // in this case the demultiplexer should also be specified. + assert (!state.run_information || state.demultiplexer): "When setting --run_information, " + + "you must also provide a demultiplexer" + + if (!state.run_information) { + println("Run information was not specified, auto-detecting...") + // The supported_platforms hashmap must be a 1-on-1 mapping + // Also, it's keys must be present in the 'choices' field + // for the 'run_information' argument in the viash config. + def supported_platforms = [ + "bclconvert": "SampleSheet.csv", // Illumina + "bases2fastq": "RunManifest.csv" // Element Biosciences + ] + def found_sample_information = supported_platforms.collectEntries{demultiplexer, filename -> + println("Checking if ${filename} can be found in input folder ${state.input}.") + def resolved_filename = state.input.resolve(filename) + if (!resolved_filename.isFile()) { + resolved_filename = null + } + println("Result after looking for run information for ${demultiplexer}: ${resolved_filename}.") + [demultiplexer, resolved_filename] + } + def demultiplexer = null + def run_information = null + found_sample_information.each{demultiplexer_candidate, file_path -> + if (file_path) { + // At this point, a candicate run information file was found. + assert !run_information: "Autodetection of run information " + + "(SampleSheet, RunManifest) failed: " + + "multiple candidate files found in input folder. " + + "Please specify one using --run_information." + run_information = file_path + demultiplexer = demultiplexer_candidate + } + } + + // When autodetecting, the run information should have been found + assert run_information: "No run information file (SampleSheet, RunManifest) " + + "found in input directory." + + // When autodetecting, the demultiplexer must be set if the run information was found + assert demultiplexer: "State error: the demultiplexer should have been autodetected. " + + "Please report this as a bug." + + // When autodetecting, the found demultiplexer must match + // with the demultiplexer that the user has provided (in case it was provided). + if (state.demultiplexer) { + assert state.demultiplexer == demultiplexer, + "Requested to use demultiplexer ${state.demultiplexer} " + + "but demultiplexer based on the autodetected run information " + "file ${run_information} seems to indicate that the demultiplexer " + "should be ${demultiplexer}. Either avoid specifying the demultiplexer " + "or override the autodetection of the run information by providing " + "the file." + } + println("Using run information ${run_information} and demultiplexer ${demultiplexer}") + // At this point, the autodetected state can override the user provided state. + newState = newState + [ + "run_information": run_information, + "demultiplexer": demultiplexer, + ] + } // end auto-detection logic + + if (newState.demultiplexer in ["bclconvert"]) { + // Do not add InterOp to state because we generate the summary csv's in the next + // step based on the run dir, not the InterOp dir. + def interop_dir = state.input.resolve("InterOp") + assert interop_dir.isDirectory(): "Expected InterOp directory to be present." + + def copycomplete_file = state.input.resolve("CopyComplete.txt") + assert (copycomplete_file.isFile() || state.skip_copycomplete_check): + "'CopyComplete.txt' file was not found!" + } + + def resultState = state + newState + [id, resultState] + } + + | setState(["demultiplexer_output": "demultiplexer", + "run_information_output": "run_information"]) + + emit: + output_ch +} \ No newline at end of file diff --git a/src/io/interop_summary_to_csv/config.vsh.yaml b/src/io/interop_summary_to_csv/config.vsh.yaml new file mode 100644 index 0000000..7ed3b8e --- /dev/null +++ b/src/io/interop_summary_to_csv/config.vsh.yaml @@ -0,0 +1,45 @@ +name: interop_summary_to_csv +namespace: io +argument_groups: + - name: Input arguments + arguments: + - name: --input + description: Sequencing run folder (*not* InterOp folder). + type: file + required: true + - name: Output arguments + arguments: + - name: --output_run_summary + type: file + direction: output + required: true + - name: --output_index_summary + type: file + direction: output + required: true +requirements: + commands: ["summary", "index-summary"] +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: /testData/iseq-DI +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: + - procps + - wget + - type: docker + run: | + wget https://github.com/Illumina/interop/releases/download/v1.3.1/interop-1.3.1-Linux-GNU.tar.gz -O /tmp/interop.tar.gz && \ + tar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \ + mv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/ + +runners: + - type: executable + - type: nextflow diff --git a/src/io/interop_summary_to_csv/script.sh b/src/io/interop_summary_to_csv/script.sh new file mode 100644 index 0000000..e4a37fb --- /dev/null +++ b/src/io/interop_summary_to_csv/script.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -eo pipefail + +if [ ! -d "$par_input" ]; then + echo "Input directory does not exist or is not a directory" + exit 1 +fi +$(which summary) --csv=1 "$par_input" 1> "$par_output_run_summary" +$(which index-summary) --csv=1 "$par_input" 1> "$par_output_index_summary" diff --git a/src/io/interop_summary_to_csv/test.sh b/src/io/interop_summary_to_csv/test.sh new file mode 100644 index 0000000..d0ba9ce --- /dev/null +++ b/src/io/interop_summary_to_csv/test.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -eo pipefail + +# create tempdir +echo ">>> Creating temporary test directory." +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT +echo ">>> Created temporary directory '$TMPDIR'." + +echo ">>> Run simple execution" +./$meta_functionality_name \ + --input "$meta_resources_dir/iseq-DI" \ + --output_run_summary "$TMPDIR/run_summary.csv" \ + --output_index_summary "$TMPDIR/index_summary.csv" \ No newline at end of file diff --git a/src/io/publish/code.sh b/src/io/publish/code.sh new file mode 100755 index 0000000..b4a8914 --- /dev/null +++ b/src/io/publish/code.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +set -eo pipefail + +declare -A input_output_mapping=(["par_input"]="par_output" + ["par_input_multiqc"]="par_output_multiqc" + ["par_input_run_information"]="par_output_run_information" + ["par_input_demultiplexer_logs"]="par_output_demultiplexer_logs" + ) + +for input_argument_name in "${!input_output_mapping[@]}" +do + input_location="${!input_argument_name}" + output_argument_name="${input_output_mapping[$input_argument_name]}" + output_location="${!output_argument_name}" + echo "Publishing $input_location -> $output_location" + + echo "Creating directory if it does not exist." + mkdir -p $(dirname "$output_location") && echo "Containing directory $output_location created" + + echo "Copying files..." + cp -rL "$input_location" "$output_location" + + echo "Output files for $output_location:" + ls "$output_location" +done + +echo "Grouping output from $par_input_falco into $par_output_falco" +mkdir -p "$par_output_falco" +IFS=";" read -ra falco_inputs <<< $par_input_falco +for falco_dir in "${falco_inputs[@]}"; do + echo "Copying contents of $falco_dir" + find -H -D exec "$falco_dir" -type f -maxdepth 1 -exec cp -t "$par_output_falco" {} + +done \ No newline at end of file diff --git a/src/io/publish/config.vsh.yaml b/src/io/publish/config.vsh.yaml new file mode 100644 index 0000000..c41c257 --- /dev/null +++ b/src/io/publish/config.vsh.yaml @@ -0,0 +1,64 @@ +name: "publish" +namespace: "io" +description: "Publish the processed results of the run" +argument_groups: + - name: Input arguments + arguments: + - name: --input + description: Directory to write fastq data to + type: file + required: true + - name: "--input_falco" + description: Directory to write falco output to + type: file + required: true + multiple: true + - name: "--input_multiqc" + description: Location where to write the MultiQC report to. + type: file + required: true + - name: "--input_run_information" + description: "Location where to write the run information to." + type: file + required: true + - name: "--input_demultiplexer_logs" + type: file + required: true + - name: Output arguments + arguments: + - name: --output + type: file + direction: output + default: "fastq" + - name: --output_falco + type: file + direction: output + default: "qc/fastqc" + - name: --output_multiqc + type: file + direction: output + default: "qc/multiqc_report.html" + - name: --output_run_information + type: file + direction: output + default: run_information.csv + - name: "--output_demultiplexer_logs" + type: file + direction: output + default: "demultiplexer_logs" + +resources: + - type: bash_script + path: ./code.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: + - procps + +runners: + - type: executable + - type: nextflow diff --git a/src/io/untar/config.vsh.yaml b/src/io/untar/config.vsh.yaml new file mode 100644 index 0000000..857c5b1 --- /dev/null +++ b/src/io/untar/config.vsh.yaml @@ -0,0 +1,44 @@ +name: untar +namespace: io +description: | + Unpack a .tar file. When the contents of the .tar file is just a single directory, + put the contents of the directory into the output folder instead of that directory. +argument_groups: + - name: Input arguments + arguments: + - name: --input + description: Tarball file to be unpacked. + type: file + required: true + - name: Output arguments + arguments: + - name: --output + description: Directory to write the contents of the .tar file to. + type: file + direction: output + required: true + - name: "Other arguments" + arguments: + - name: "--exclude" + alternatives: ["-e"] + type: string + description: Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted. + example: "docs/figures" + required: false +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: + - procps + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/io/untar/script.sh b/src/io/untar/script.sh new file mode 100644 index 0000000..253ff47 --- /dev/null +++ b/src/io/untar/script.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -eo pipefail + +extra_args=() + +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT + +# Check if tarball contains 1 top-level directory. If so, extract the contents of the +# directory to the output directory instead of the directory itself. +echo "Directory contents:" +tar -taf "${par_input}" > "$TMPDIR/tar_contents.txt" +cat "$TMPDIR/tar_contents.txt" + +printf "Checking if tarball contains only a single top-level directory: " +if [[ $(grep -o -E '^[./]*[^/]+/$' "$TMPDIR/tar_contents.txt" | uniq | wc -l) -eq 1 ]]; then + echo "It does." + echo "Extracting the contents of the top-level directory to the output directory instead of the directory itself." + # The directory can be both of the format './' (or ././) or just + # Adjust the number of stripped components accordingly by looking for './' at the beginning of the file. + starting_relative=$(grep -oP -m 1 '^(./)*' "$TMPDIR/tar_contents.txt" | tr -d '\n' | wc -c) + n_strips=$(( ($starting_relative / 2)+1 )) + extra_args+=("--strip-components=$n_strips") +else + echo "It does not." +fi + +if [ "$par_exclude" != "" ]; then + echo "Exclusion of files with wildcard '$par_exclude' requested." + extra_args+=("--exclude=$par_exclude") +fi + +echo "Starting extraction of tarball '$par_input' to output directory '$par_output'." +mkdir -p "$par_output" +echo "executing 'tar --no-same-owner --no-same-permissions --directory=$par_output ${extra_args[@]} -xavf $par_input'" +tar --no-same-owner --no-same-permissions --directory="$par_output" ${extra_args[@]} -xavf "$par_input" + diff --git a/src/io/untar/test.sh b/src/io/untar/test.sh new file mode 100644 index 0000000..c49a576 --- /dev/null +++ b/src/io/untar/test.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash + +set -eo pipefail + +# create tempdir +echo ">>> Creating temporary test directory." +TMPDIR=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT +echo ">>> Created temporary directory '$TMPDIR'." + +INPUT_FILE="$TMPDIR/test_file.txt" +echo ">>> Creating test input file at '$TMPDIR/test_file.txt'." +echo "foo" > "$INPUT_FILE" +echo ">>> Created '$INPUT_FILE'." + +echo ">>> Creating tar.gz from '$INPUT_FILE'." +TARFILE="${INPUT_FILE}.tar.gz" +tar -C "$TMPDIR" -czvf ${INPUT_FILE}.tar.gz $(basename "$INPUT_FILE") +[[ ! -f "$TARFILE" ]] && echo ">>> Test setup failed: could not create tarfile." && exit 1 +echo ">>> '$TARFILE' created." + +echo ">>> Check whether tar.gz can be extracted" +echo ">>> Creating temporary output directory for test 1." +OUTPUT_DIR_1="$TMPDIR/output_test_1/" +mkdir "$OUTPUT_DIR_1" + +echo ">>> Extracting '$TARFILE' to '$OUTPUT_DIR_1'". +./$meta_functionality_name \ + --input "$TARFILE" \ + --output "$OUTPUT_DIR_1" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_1/test_file.txt" ]] && echo "Output file could not be found. Output directory contents: " && ls "$OUTPUT_DIR_1" && exit 1 + +echo ">>> Creating temporary output directory for test 2." +OUTPUT_DIR_2="$TMPDIR/output_test_2/" +mkdir "$OUTPUT_DIR_2" + +echo ">>> Extracting '$TARFILE' to '$OUTPUT_DIR_2', excluding '$test_file.txt'". +./$meta_functionality_name \ + --input "$TARFILE" \ + --output "$OUTPUT_DIR_2" \ + --exclude 'test_file.txt' + +echo ">>> Check whether excluded file was not extracted" +[[ -f "$OUTPUT_DIR_2/test_file.txt" ]] && echo "File should have been excluded! Output directory contents:" && ls "$OUTPUT_DIR_2" && exit 1 + +echo ">>> Creating test tarball containing only 1 top-level directory." +mkdir "$TMPDIR/input_test_3/" +cp "$INPUT_FILE" "$TMPDIR/input_test_3/" +tar -C "$TMPDIR" -czvf "$TMPDIR/input_test_3.tar.gz" $(basename "$TMPDIR/input_test_3") +TARFILE_3="$TMPDIR/input_test_3.tar.gz" + +echo ">>> Creating temporary output directory for test 3." +OUTPUT_DIR_3="$TMPDIR/output_test_3/" +mkdir "$OUTPUT_DIR_3" + +echo "Extracting '$TARFILE_3' to '$OUTPUT_DIR_3'". +./$meta_functionality_name \ + --input "$TARFILE_3" \ + --output "$OUTPUT_DIR_3" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_3/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 + +echo ">>> Check for tar archive that contains a single directory starting with './'." +mkdir "$TMPDIR/input_test_4/" +cp "$INPUT_FILE" "$TMPDIR/input_test_4/" + +pushd "$TMPDIR/" +trap popd ERR +tar -czvf "$TMPDIR/input_test_4.tar.gz" ./input_test_4 +popd +trap - ERR + +OUTPUT_DIR_4="$TMPDIR/output_test_4/" +echo "Extracting '$TMPDIR/input_test_4.tar.gz' to '$OUTPUT_DIR_4'". +./$meta_functionality_name \ + --input "$TMPDIR/input_test_4.tar.gz" \ + --output "$OUTPUT_DIR_4" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_4/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 + +echo ">>> Creating test tarball containing only 1 top-level directory, but it is nested." +mkdir -p "$TMPDIR/input_test_5/nested/" +cp "$INPUT_FILE" "$TMPDIR/input_test_5/nested/" +tar -C "$TMPDIR" -czvf "$TMPDIR/input_test_5.tar.gz" $(basename "$TMPDIR/input_test_5") +TARFILE_5="$TMPDIR/input_test_5.tar.gz" + +echo ">>> Creating temporary output directory for test 5." +OUTPUT_DIR_5="$TMPDIR/output_test_5/" +mkdir "$OUTPUT_DIR_5" + +echo "Extracting '$TARFILE_5' to '$OUTPUT_DIR_5'". +./$meta_functionality_name \ + --input "$TARFILE_5" \ + --output "$OUTPUT_DIR_5" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_5/nested/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 + +echo ">>> Creating test tarball containing two top-level directories." +mkdir -p "$TMPDIR/input_test_6/number_1/" +mkdir "$TMPDIR/input_test_6/number_2/" +cp "$INPUT_FILE" "$TMPDIR/input_test_6/number_1/" +tar -C "$TMPDIR" -czvf "$TMPDIR/input_test_6.tar.gz" $(basename "$TMPDIR/input_test_6") +TARFILE_6="$TMPDIR/input_test_6.tar.gz" + +echo ">>> Creating temporary output directory for test 6." +OUTPUT_DIR_6="$TMPDIR/output_test_6/" +mkdir "$OUTPUT_DIR_6" + +echo "Extracting '$TARFILE_6' to '$OUTPUT_DIR_6'". +./$meta_functionality_name \ + --input "$TARFILE_6" \ + --output "$OUTPUT_DIR_6" + +echo ">>> Check whether extracted file exists" +[[ ! -f "$OUTPUT_DIR_6/number_1/test_file.txt" ]] && echo "Output file could not be found!" && exit 1 +[[ ! -d "$OUTPUT_DIR_6/number_2" ]] && echo "Output directory could not be found!" && exit 1 + +echo ">>> Test finished successfully" diff --git a/src/runner/config.vsh.yaml b/src/runner/config.vsh.yaml new file mode 100644 index 0000000..9f3cd37 --- /dev/null +++ b/src/runner/config.vsh.yaml @@ -0,0 +1,78 @@ +name: runner +description: Runner for demultiplexing of raw sequencing data +argument_groups: + - name: Input arguments + arguments: + - name: --input + description: | + Base directory of the canonical form `s3://///`. + A tarball (tar.gz, .tgz, .tar) containing run information can be provided in which + case the RunID is set to the name of the tarball without the extension. + type: file + required: true + - name: --run_information + description: | + CSV file containing sample information, which will be used as + input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina) + or 'RunManifest.csv' (Element Biosciences). If not specified, + will try to autodetect the sample sheet in the input directory. + Requires --demultiplexer to be set. + type: file + required: false + - name: "--demultiplexer" + type: string + required: false + choices: ["bases2fastq", "bclconvert"] + description: | + Demultiplexer to use, choice depends on the provider + of the instrument that was used to generate the data. + When not using --sample_sheet, specifying this argument is not + required. + - name: Annotation flags + arguments: + - name: --plain_output + description: | + Flag to indicate that the output should be stored directly under $publish_dir rather than + under a subdirectory structure runID/_demultiplex_/. + type: boolean_true + - name: Output arguments + arguments: + - name: --fastq_output + type: file + direction: output + default: "fastq" + - name: --falco_output + type: file + direction: output + default: "qc/fastqc" + - name: --multiqc_output + type: file + direction: output + default: "qc/multiqc_report.html" + - name: "--demultiplexer_logs" + type: file + direction: output + default: "demultiplexer_logs" + - name: "Other arguments" + arguments: + - name: --skip_copycomplete_check + type: boolean_true + description: | + Disable the check for the presence of a "CopyComplete.txt" file in input + directory in case of Illumina data. +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + +dependencies: + - name: demultiplex + repository: local + - name: io/publish + repository: local + +runners: + - type: nextflow + +engines: + - type: native diff --git a/src/runner/main.nf b/src/runner/main.nf new file mode 100644 index 0000000..9ee0b0e --- /dev/null +++ b/src/runner/main.nf @@ -0,0 +1,93 @@ +def date = new Date().format('yyyyMMdd_hhmmss') + +def viash_config = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString() + "/_viash.yaml" +def version = get_version(viash_config) + +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + // Extract the ID from the input. + // If the input is a tarball, strip the suffix. + | map{ id, state -> + def id_with_suffix = state.input.getFileName().toString() + [ + id, + state + [ run_id: id_with_suffix - ~/\.(tar.gz|tgz|tar)$/ ] + ] + } + | demultiplex.run( + fromState: { id, state -> + def state_to_pass = [ + "input": state.input, + "run_information": state.run_information, + "demultiplexer": state.demultiplexer, + "skip_copycomplete_check": state.skip_copycomplete_check, + "output": "$id/fastq", + "output_falco": "$id/qc/fastqc", + "output_multiqc": "$id/qc/multiqc_report.html", + "demultiplexer_logs": "$id/demultiplexer_logs", + ] + if (state.run_information) { + state_to_pass += ["output_run_information": state.run_information.getName()] + } + state_to_pass + }, + toState: { id, result, state -> + state + result + }, + ) + | publish.run( + fromState: { id, state -> + println(state.plain_output) + def id1 = (state.plain_output) ? id : "${state.run_id}/${date}" + def id2 = (state.plain_output) ? id : "${id1}_demultiplex_${version}" + + def fastq_output_1 = (id2 == "run") ? state.fastq_output : "${id2}/" + state.fastq_output + def falco_output_1 = (id2 == "run") ? state.falco_output : "${id2}/" + state.falco_output + def multiqc_output_1 = (id2 == "run") ? state.multiqc_output : "${id2}/" + state.multiqc_output + def run_information_output_1 = (id2 == "run") ? "${state.output_run_information.getName()}" : "${id2}/${state.output_run_information.getName()}" + def demultiplexer_logs_output = (id2 == "run") ? state.demultiplexer_logs : "${id2}/${state.demultiplexer_logs.getName()}" + + if (id2 == "run") { + println("Publising to ${params.publish_dir}") + } else { + println("Publising to ${params.publish_dir}/${id2}") + } + + [ + input: state.output, + input_falco: state.output_falco, + input_multiqc: state.output_multiqc, + input_run_information: state.output_run_information, + input_demultiplexer_logs: state.demultiplexer_logs, + output: fastq_output_1, + output_falco: falco_output_1, + output_multiqc: multiqc_output_1, + output_run_information: run_information_output_1, + output_demultiplexer_logs: demultiplexer_logs_output, + ] + }, + toState: { id, result, state -> [:] }, + directives: [ + publishDir: [ + path: "${params.publish_dir}", + overwrite: false, + mode: "copy" + ] + ] + ) + + emit: + output_ch +} + +def get_version(inputFile) { + def yamlSlurper = new groovy.yaml.YamlSlurper() + def loaded_viash_config = yamlSlurper.parse(file(inputFile)) + def version = (loaded_viash_config.version) ? loaded_viash_config.version : "unknown_version" + println("Version to be used: ${version}") + return version +} diff --git a/src/runner/nextflow.config b/src/runner/nextflow.config new file mode 100644 index 0000000..20297b8 --- /dev/null +++ b/src/runner/nextflow.config @@ -0,0 +1,12 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' +} + +process { + withName: publishStatesProc { + publishDir = [ enabled: false ] + } +} + +// include common settings +includeConfig("${params.rootDir}/src/config/labels.config") diff --git a/target/.build.yaml b/target/.build.yaml new file mode 100644 index 0000000..e69de29 diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/.config.vsh.yaml new file mode 100644 index 0000000..5cfe0a7 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/.config.vsh.yaml @@ -0,0 +1,435 @@ +name: "bases2fastq" +version: "v0.3.1" +authors: +- name: "Dries Schaumont" + roles: + - "author" + - "maintainer" + info: + links: + email: "dries@data-intuitive.com" + github: "DriesSchaumont" + orcid: "0000-0002-4389-0440" + linkedin: "dries-schaumont" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--analysis_directory" + description: "Location of analysis directory" + info: null + example: + - "input" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--run_manifest" + alternatives: + - "-r" + description: "Location of run manifest to use instead of default RunManifest.csv\ + \ found in analysis directory" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--output_directory" + alternatives: + - "-o" + description: "Location to save output fastqs" + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--report" + description: "Output location for the HTML report" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--logs" + description: "Directory containing log files" + info: null + example: + - "logs_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Arguments" + arguments: + - type: "string" + name: "--chemistry_version" + description: "Run parameters override, chemistry version." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--demux_only" + alternatives: + - "-d" + description: "Generate demux files and indexing stats without generating FASTQ\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--detect_adapters" + description: "Detect adapters sequences, overriding any sequences present in run\ + \ manifest.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--error_on_missing" + description: "Terminate execution for a missing file (by default, missing files\ + \ are\nskipped and execution continues). Also set by --strict.\n" + info: null + direction: "input" + - type: "string" + name: "--exclude_tile" + alternatives: + - "-e" + description: "Regex matching tile names to exclude. This flag can be specified\ + \ multiple times. (e.g. L1.*C0[23]S.)\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--filter_mask" + description: "Run parameters override, custom pass filter mask.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--flowcell_id" + description: "Run parameters override, flowcell ID.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--force_index_orientation" + description: "Do not attempt to find orientation for I1/I2 reads (reverse complement).\n\ + Use orientation given in run manifest.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--group_fastq" + description: "Group all FASTQ/stats/metrics for a project are in the project folder.\n" + info: null + direction: "input" + - type: "integer" + name: "--i1_cycles" + description: "Run parameters override, I1 cycles.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--i2_cycles" + description: "Run parameters override, I2 cycles\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--include_tile" + alternatives: + - "-i" + description: "Regex matching tile names to include. This flag\ncan be specified\ + \ multiple times. (e.g. L1.*C0[23]S.)\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--kit_configuration" + description: "Run parameters override, kit configuration.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--legacy_fastq" + description: "Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)\n" + info: null + direction: "input" + - type: "string" + name: "--log_level" + alternatives: + - "-l" + description: "Severity level for logging.\n" + info: null + example: + - "INFO" + required: false + choices: + - "DEBUG" + - "INFO" + - "WARNING" + - "ERROR" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--no_error_on_invalid" + description: "Skip invalid files and continue execution. Overridden by --strict\ + \ options\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_projects" + description: "Disable project directories\n" + info: null + direction: "input" + - type: "integer" + name: "--num_unassigned" + description: "Max Number of unassigned sequences to report.\n" + info: null + example: + - 30 + required: false + min: 0 + max: 1000 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--preparation_workflow" + description: "Run parameters override, preparation workflow. \n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--qc_only" + description: "Quickly generate run stats for single tile without generating FASTQ.\n\ + Use --include_tile/--exclude_tile to define custom tile set.\n" + info: null + direction: "input" + - type: "integer" + name: "--r1_cycles" + description: "Run parameters override, R1 cycles.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--r2_cycles" + description: "Run parameters override, R2 cycles.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--split_lanes" + description: "Split FASTQ files by lane.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--strict" + description: "In strict mode any invalid or missing input file will terminate\ + \ execution \n(overrides no_error_on_invalid and sets --error_on_missing)\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Bases2Fastq demultiplexes sequencing data generated by Element Biosciences\ + \ instruments and converts base calls into FASTQ files.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "demultiplex" +- "fastq" +- "demux" +- "Element Biosciences" +license: "Proprietairy" +links: + repository: "https://github.com/viash-hub/biobox" + documentation: "https://docs.elembio.io/docs/bases2fastq/introduction/" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "elembio/bases2fastq:2.1.0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "tree" + interactive: false + - type: "docker" + run: + - "echo \"bases2fastq: $(bases2fastq --version | cut -d' ' -f3)\" > /var/software_versions.txt\n" + test_setup: + - type: "apt" + packages: + - "curl" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bases2fastq/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bases2fastq" + executable: "target/nextflow/bases2fastq/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/main.nf new file mode 100644 index 0000000..52f135b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/main.nf @@ -0,0 +1,4217 @@ +// bases2fastq v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dries Schaumont (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bases2fastq", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Dries Schaumont", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "dries@data-intuitive.com", + "github" : "DriesSchaumont", + "orcid" : "0000-0002-4389-0440", + "linkedin" : "dries-schaumont" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--analysis_directory", + "description" : "Location of analysis directory", + "example" : [ + "input" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--run_manifest", + "alternatives" : [ + "-r" + ], + "description" : "Location of run manifest to use instead of default RunManifest.csv found in analysis directory", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--output_directory", + "alternatives" : [ + "-o" + ], + "description" : "Location to save output fastqs", + "example" : [ + "fastq_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--report", + "description" : "Output location for the HTML report", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--logs", + "description" : "Directory containing log files", + "example" : [ + "logs_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--chemistry_version", + "description" : "Run parameters override, chemistry version.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--demux_only", + "alternatives" : [ + "-d" + ], + "description" : "Generate demux files and indexing stats without generating FASTQ\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--detect_adapters", + "description" : "Detect adapters sequences, overriding any sequences present in run manifest.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--error_on_missing", + "description" : "Terminate execution for a missing file (by default, missing files are\nskipped and execution continues). Also set by --strict.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--exclude_tile", + "alternatives" : [ + "-e" + ], + "description" : "Regex matching tile names to exclude. This flag can be specified multiple times. (e.g. L1.*C0[23]S.)\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--filter_mask", + "description" : "Run parameters override, custom pass filter mask.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--flowcell_id", + "description" : "Run parameters override, flowcell ID.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--force_index_orientation", + "description" : "Do not attempt to find orientation for I1/I2 reads (reverse complement).\nUse orientation given in run manifest.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--group_fastq", + "description" : "Group all FASTQ/stats/metrics for a project are in the project folder.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--i1_cycles", + "description" : "Run parameters override, I1 cycles.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--i2_cycles", + "description" : "Run parameters override, I2 cycles\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--include_tile", + "alternatives" : [ + "-i" + ], + "description" : "Regex matching tile names to include. This flag\ncan be specified multiple times. (e.g. L1.*C0[23]S.)\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--kit_configuration", + "description" : "Run parameters override, kit configuration.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--legacy_fastq", + "description" : "Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--log_level", + "alternatives" : [ + "-l" + ], + "description" : "Severity level for logging.\n", + "example" : [ + "INFO" + ], + "required" : false, + "choices" : [ + "DEBUG", + "INFO", + "WARNING", + "ERROR" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--no_error_on_invalid", + "description" : "Skip invalid files and continue execution. Overridden by --strict options\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_projects", + "description" : "Disable project directories\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_unassigned", + "description" : "Max Number of unassigned sequences to report.\n", + "example" : [ + 30 + ], + "required" : false, + "min" : 0, + "max" : 1000, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--preparation_workflow", + "description" : "Run parameters override, preparation workflow. \n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--qc_only", + "description" : "Quickly generate run stats for single tile without generating FASTQ.\nUse --include_tile/--exclude_tile to define custom tile set.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--r1_cycles", + "description" : "Run parameters override, R1 cycles.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--r2_cycles", + "description" : "Run parameters override, R2 cycles.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--split_lanes", + "description" : "Split FASTQ files by lane.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--strict", + "description" : "In strict mode any invalid or missing input file will terminate execution \n(overrides no_error_on_invalid and sets --error_on_missing)\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Bases2Fastq demultiplexes sequencing data generated by Element Biosciences instruments and converts base calls into FASTQ files.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "demultiplex", + "fastq", + "demux", + "Element Biosciences" + ], + "license" : "Proprietairy", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "documentation" : "https://docs.elembio.io/docs/bases2fastq/introduction/" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "elembio/bases2fastq:2.1.0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "tree" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "echo \\"bases2fastq: $(bases2fastq --version | cut -d' ' -f3)\\" > /var/software_versions.txt\n" + ] + } + ], + "test_setup" : [ + { + "type" : "apt", + "packages" : [ + "curl" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/bases2fastq/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/bases2fastq", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ANALYSIS_DIRECTORY+x} ]; then echo "${VIASH_PAR_ANALYSIS_DIRECTORY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_analysis_directory='&'#" ; else echo "# par_analysis_directory="; fi ) +$( if [ ! -z ${VIASH_PAR_RUN_MANIFEST+x} ]; then echo "${VIASH_PAR_RUN_MANIFEST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_manifest='&'#" ; else echo "# par_run_manifest="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DIRECTORY+x} ]; then echo "${VIASH_PAR_OUTPUT_DIRECTORY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_directory='&'#" ; else echo "# par_output_directory="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT+x} ]; then echo "${VIASH_PAR_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report='&'#" ; else echo "# par_report="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_CHEMISTRY_VERSION+x} ]; then echo "${VIASH_PAR_CHEMISTRY_VERSION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chemistry_version='&'#" ; else echo "# par_chemistry_version="; fi ) +$( if [ ! -z ${VIASH_PAR_DEMUX_ONLY+x} ]; then echo "${VIASH_PAR_DEMUX_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_demux_only='&'#" ; else echo "# par_demux_only="; fi ) +$( if [ ! -z ${VIASH_PAR_DETECT_ADAPTERS+x} ]; then echo "${VIASH_PAR_DETECT_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_detect_adapters='&'#" ; else echo "# par_detect_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR_ON_MISSING+x} ]; then echo "${VIASH_PAR_ERROR_ON_MISSING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error_on_missing='&'#" ; else echo "# par_error_on_missing="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE_TILE+x} ]; then echo "${VIASH_PAR_EXCLUDE_TILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude_tile='&'#" ; else echo "# par_exclude_tile="; fi ) +$( if [ ! -z ${VIASH_PAR_FILTER_MASK+x} ]; then echo "${VIASH_PAR_FILTER_MASK}" | sed "s#'#'\\"'\\"'#g;s#.*#par_filter_mask='&'#" ; else echo "# par_filter_mask="; fi ) +$( if [ ! -z ${VIASH_PAR_FLOWCELL_ID+x} ]; then echo "${VIASH_PAR_FLOWCELL_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_flowcell_id='&'#" ; else echo "# par_flowcell_id="; fi ) +$( if [ ! -z ${VIASH_PAR_FORCE_INDEX_ORIENTATION+x} ]; then echo "${VIASH_PAR_FORCE_INDEX_ORIENTATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_force_index_orientation='&'#" ; else echo "# par_force_index_orientation="; fi ) +$( if [ ! -z ${VIASH_PAR_GROUP_FASTQ+x} ]; then echo "${VIASH_PAR_GROUP_FASTQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_group_fastq='&'#" ; else echo "# par_group_fastq="; fi ) +$( if [ ! -z ${VIASH_PAR_I1_CYCLES+x} ]; then echo "${VIASH_PAR_I1_CYCLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_i1_cycles='&'#" ; else echo "# par_i1_cycles="; fi ) +$( if [ ! -z ${VIASH_PAR_I2_CYCLES+x} ]; then echo "${VIASH_PAR_I2_CYCLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_i2_cycles='&'#" ; else echo "# par_i2_cycles="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_TILE+x} ]; then echo "${VIASH_PAR_INCLUDE_TILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_tile='&'#" ; else echo "# par_include_tile="; fi ) +$( if [ ! -z ${VIASH_PAR_KIT_CONFIGURATION+x} ]; then echo "${VIASH_PAR_KIT_CONFIGURATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kit_configuration='&'#" ; else echo "# par_kit_configuration="; fi ) +$( if [ ! -z ${VIASH_PAR_LEGACY_FASTQ+x} ]; then echo "${VIASH_PAR_LEGACY_FASTQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_legacy_fastq='&'#" ; else echo "# par_legacy_fastq="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG_LEVEL+x} ]; then echo "${VIASH_PAR_LOG_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log_level='&'#" ; else echo "# par_log_level="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_ERROR_ON_INVALID+x} ]; then echo "${VIASH_PAR_NO_ERROR_ON_INVALID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_error_on_invalid='&'#" ; else echo "# par_no_error_on_invalid="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_PROJECTS+x} ]; then echo "${VIASH_PAR_NO_PROJECTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_projects='&'#" ; else echo "# par_no_projects="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_UNASSIGNED+x} ]; then echo "${VIASH_PAR_NUM_UNASSIGNED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_unassigned='&'#" ; else echo "# par_num_unassigned="; fi ) +$( if [ ! -z ${VIASH_PAR_PREPARATION_WORKFLOW+x} ]; then echo "${VIASH_PAR_PREPARATION_WORKFLOW}" | sed "s#'#'\\"'\\"'#g;s#.*#par_preparation_workflow='&'#" ; else echo "# par_preparation_workflow="; fi ) +$( if [ ! -z ${VIASH_PAR_QC_ONLY+x} ]; then echo "${VIASH_PAR_QC_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qc_only='&'#" ; else echo "# par_qc_only="; fi ) +$( if [ ! -z ${VIASH_PAR_R1_CYCLES+x} ]; then echo "${VIASH_PAR_R1_CYCLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_r1_cycles='&'#" ; else echo "# par_r1_cycles="; fi ) +$( if [ ! -z ${VIASH_PAR_R2_CYCLES+x} ]; then echo "${VIASH_PAR_R2_CYCLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_r2_cycles='&'#" ; else echo "# par_r2_cycles="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT_LANES+x} ]; then echo "${VIASH_PAR_SPLIT_LANES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split_lanes='&'#" ; else echo "# par_split_lanes="; fi ) +$( if [ ! -z ${VIASH_PAR_STRICT+x} ]; then echo "${VIASH_PAR_STRICT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strict='&'#" ; else echo "# par_strict="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# Exit on error +set -eo pipefail + +# Unset parameters +unset_if_false=( + par_demux_only + par_detect_adapters + par_error_on_missing + par_group_fastq + par_legacy_fastq + par_no_error_on_invalid + par_no_projects + par_qc_only + par_split_lanes + par_skip_qc_report + par_strict + par_force_index_orientation +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# NOTE: --preparation-workflow is bugged in bases2fastq +args=( + \\${par_demux_only:+--demux-only} + \\${par_detect_adapters:+--detect-adapters} + \\${par_error_on_missing:+--error-on-missing} + \\${par_group_fastq:+--group-fastq} + \\${par_legacy_fastq:+--legacy-fastq} + \\${par_no_error_on_invalid:+--no-error-on-invalid} + \\${par_no_projects:+--no-projects} + \\${par_split_lanes:+--split-lanes} + \\${par_strict:+--strict} + \\${par_force_index_orientation:+--force-index-orientation} + \\${par_chemistry_version:+--chemistry-version "\\$par_chemistry_version"} + \\${par_filter_mask:+--filter-mask "\\$par_filter_mask"} + \\${par_flowcell_id:+--flowcell-id "\\$par_flowcell_id"} + \\${par_i1_cycles:+--i1-cycles "\\$par_i1_cycles"} + \\${par_i2_cycles:+--i2-cycles "\\$par_i2_cycles"} + \\${par_r1_cycles:+--r1-cycles "\\$par_r1_cycles"} + \\${par_r2_cycles:+--r2-cycles "\\$par_r2_cycles"} + \\${par_kit_configuration:+--kit-configuration "\\$par_kit_configuration"} + \\${par_log_level:+--log-level "\\$par_log_level"} + \\${par_num_unassigned:+--num-unassigned "\\$par_num_unassigned"} + \\${par_preparation_workflow:+--preparation-workflow "\\$par_preparation_workflow"} + \\${meta_cpus:+--num-threads "\\$meta_cpus"} + \\${par_run_manifest:+--run-manifest "\\$par_run_manifest"} +) + +# Create arrays for inputs that contain multiple arguments +IFS=";" read -ra exclude_tile <<< "\\$par_exclude_tile" +IFS=";" read -ra include_tile <<< "\\$par_include_tile" + +if [ -z "\\$par_report" ]; then + args+=( --skip-qc-report ) +fi + +for arg_value in "\\${exclude_tile[@]}"; do + args+=( "--exclude-tile" "\\$arg_value" ) +done + +for arg_value in "\\${include_tile[@]}"; do + args+=( "--include-tile" "\\$arg_value" ) +done + +echo "> Creating temporary directory." +# create temporary directory and clean up on exit +TMPDIR=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXX") +echo "> Created \\$TMPDIR" +function clean_up { + [[ -d "\\$TMPDIR" ]] && rm -rf "\\$TMPDIR" +} +trap clean_up EXIT + +args+=( "\\$par_analysis_directory" "\\$TMPDIR") +echo "> Running bases2fastq with arguments: \\${args[@]}" +bases2fastq \\${args[@]} +echo "> Done running sgdemux" + +echo "> Output folder:" +tree "\\$TMPDIR" + +echo "> Moving FASTQ files into final output directory" +mkdir -p "\\$par_output_directory/" +mv "\\$TMPDIR"/Samples/* --target-directory="\\$par_output_directory" + +if [ ! -z "\\$par_report" ]; then + echo "> Moving HTML report to the output (\\$par_report)" + mv "\\$TMPDIR"/*.html "\\$par_report" +else + echo " > Leaving reports alone" +fi + +# Logs is everything else +if [ ! -z "\\$par_logs" ]; then + mkdir -p "\\$par_logs" + echo "> Moving logs to their own location (\\$par_logs)" + mv "\\$TMPDIR/"* "\\$par_logs/" +else + echo "> Not moving logs" +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/bases2fastq", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/nextflow.config new file mode 100644 index 0000000..1b9cc0b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'bases2fastq' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Bases2Fastq demultiplexes sequencing data generated by Element Biosciences instruments and converts base calls into FASTQ files.\n' + author = 'Dries Schaumont' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/nextflow_schema.json new file mode 100644 index 0000000..d59d797 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/nextflow_schema.json @@ -0,0 +1,394 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bases2fastq", +"description": "Bases2Fastq demultiplexes sequencing data generated by Element Biosciences instruments and converts base calls into FASTQ files.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "chemistry_version": { + "type": + "string", + "description": "Type: `string`. Run parameters override, chemistry version", + "help_text": "Type: `string`. Run parameters override, chemistry version." + + } + + + , + "demux_only": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate demux files and indexing stats without generating FASTQ\n", + "help_text": "Type: `boolean_true`, default: `false`. Generate demux files and indexing stats without generating FASTQ\n" + , + "default":false + } + + + , + "detect_adapters": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Detect adapters sequences, overriding any sequences present in run manifest", + "help_text": "Type: `boolean_true`, default: `false`. Detect adapters sequences, overriding any sequences present in run manifest.\n" + , + "default":false + } + + + , + "error_on_missing": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Terminate execution for a missing file (by default, missing files are\nskipped and execution continues)", + "help_text": "Type: `boolean_true`, default: `false`. Terminate execution for a missing file (by default, missing files are\nskipped and execution continues). Also set by --strict.\n" + , + "default":false + } + + + , + "exclude_tile": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. Regex matching tile names to exclude", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. Regex matching tile names to exclude. This flag can be specified multiple times. (e.g. L1.*C0[23]S.)\n" + + } + + + , + "filter_mask": { + "type": + "string", + "description": "Type: `string`. Run parameters override, custom pass filter mask", + "help_text": "Type: `string`. Run parameters override, custom pass filter mask.\n" + + } + + + , + "flowcell_id": { + "type": + "string", + "description": "Type: `string`. Run parameters override, flowcell ID", + "help_text": "Type: `string`. Run parameters override, flowcell ID.\n" + + } + + + , + "force_index_orientation": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not attempt to find orientation for I1/I2 reads (reverse complement)", + "help_text": "Type: `boolean_true`, default: `false`. Do not attempt to find orientation for I1/I2 reads (reverse complement).\nUse orientation given in run manifest.\n" + , + "default":false + } + + + , + "group_fastq": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Group all FASTQ/stats/metrics for a project are in the project folder", + "help_text": "Type: `boolean_true`, default: `false`. Group all FASTQ/stats/metrics for a project are in the project folder.\n" + , + "default":false + } + + + , + "i1_cycles": { + "type": + "integer", + "description": "Type: `integer`. Run parameters override, I1 cycles", + "help_text": "Type: `integer`. Run parameters override, I1 cycles.\n" + + } + + + , + "i2_cycles": { + "type": + "integer", + "description": "Type: `integer`. Run parameters override, I2 cycles\n", + "help_text": "Type: `integer`. Run parameters override, I2 cycles\n" + + } + + + , + "include_tile": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. Regex matching tile names to include", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. Regex matching tile names to include. This flag\ncan be specified multiple times. (e.g. L1.*C0[23]S.)\n" + + } + + + , + "kit_configuration": { + "type": + "string", + "description": "Type: `string`. Run parameters override, kit configuration", + "help_text": "Type: `string`. Run parameters override, kit configuration.\n" + + } + + + , + "legacy_fastq": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Legacy naming for FASTQ files (e", + "help_text": "Type: `boolean_true`, default: `false`. Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)\n" + , + "default":false + } + + + , + "log_level": { + "type": + "string", + "description": "Type: `string`, example: `INFO`, choices: ``DEBUG`, `INFO`, `WARNING`, `ERROR``. Severity level for logging", + "help_text": "Type: `string`, example: `INFO`, choices: ``DEBUG`, `INFO`, `WARNING`, `ERROR``. Severity level for logging.\n", + "enum": ["DEBUG", "INFO", "WARNING", "ERROR"] + + + } + + + , + "no_error_on_invalid": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Skip invalid files and continue execution", + "help_text": "Type: `boolean_true`, default: `false`. Skip invalid files and continue execution. Overridden by --strict options\n" + , + "default":false + } + + + , + "no_projects": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable project directories\n", + "help_text": "Type: `boolean_true`, default: `false`. Disable project directories\n" + , + "default":false + } + + + , + "num_unassigned": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. Max Number of unassigned sequences to report", + "help_text": "Type: `integer`, example: `30`. Max Number of unassigned sequences to report.\n" + + } + + + , + "preparation_workflow": { + "type": + "string", + "description": "Type: `string`. Run parameters override, preparation workflow", + "help_text": "Type: `string`. Run parameters override, preparation workflow. \n" + + } + + + , + "qc_only": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Quickly generate run stats for single tile without generating FASTQ", + "help_text": "Type: `boolean_true`, default: `false`. Quickly generate run stats for single tile without generating FASTQ.\nUse --include_tile/--exclude_tile to define custom tile set.\n" + , + "default":false + } + + + , + "r1_cycles": { + "type": + "integer", + "description": "Type: `integer`. Run parameters override, R1 cycles", + "help_text": "Type: `integer`. Run parameters override, R1 cycles.\n" + + } + + + , + "r2_cycles": { + "type": + "integer", + "description": "Type: `integer`. Run parameters override, R2 cycles", + "help_text": "Type: `integer`. Run parameters override, R2 cycles.\n" + + } + + + , + "split_lanes": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Split FASTQ files by lane", + "help_text": "Type: `boolean_true`, default: `false`. Split FASTQ files by lane.\n" + , + "default":false + } + + + , + "strict": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In strict mode any invalid or missing input file will terminate execution \n(overrides no_error_on_invalid and sets --error_on_missing)\n", + "help_text": "Type: `boolean_true`, default: `false`. In strict mode any invalid or missing input file will terminate execution \n(overrides no_error_on_invalid and sets --error_on_missing)\n" + , + "default":false + } + + +} +}, + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "analysis_directory": { + "type": + "string", + "description": "Type: `file`, required, example: `input`. Location of analysis directory", + "help_text": "Type: `file`, required, example: `input`. Location of analysis directory" + + } + + + , + "run_manifest": { + "type": + "string", + "description": "Type: `file`. Location of run manifest to use instead of default RunManifest", + "help_text": "Type: `file`. Location of run manifest to use instead of default RunManifest.csv found in analysis directory" + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_directory": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_directory`, example: `fastq_dir`. Location to save output fastqs", + "help_text": "Type: `file`, required, default: `$id.$key.output_directory`, example: `fastq_dir`. Location to save output fastqs" + , + "default":"$id.$key.output_directory" + } + + + , + "report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.report`. Output location for the HTML report", + "help_text": "Type: `file`, default: `$id.$key.report`. Output location for the HTML report" + , + "default":"$id.$key.report" + } + + + , + "logs": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.logs`, example: `logs_dir`. Directory containing log files", + "help_text": "Type: `file`, default: `$id.$key.logs`, example: `logs_dir`. Directory containing log files" + , + "default":"$id.$key.logs" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/.config.vsh.yaml new file mode 100644 index 0000000..566d562 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/.config.vsh.yaml @@ -0,0 +1,469 @@ +name: "bcl_convert" +version: "v0.3.1" +authors: +- name: "Toni Verbeiren" + roles: + - "author" + - "maintainer" + info: + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" +- name: "Dorien Roosen" + roles: + - "author" + info: + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--bcl_input_directory" + alternatives: + - "-i" + description: "Input run directory" + info: null + example: + - "bcl_dir" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_sheet" + alternatives: + - "-s" + description: "Path to SampleSheet.csv file (default searched for in --bcl_input_directory)" + info: null + example: + - "bcl_dir/sample_sheet.csv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--run_info" + description: "Path to RunInfo.xml file (default root of BCL input directory)" + info: null + example: + - "bcl_dir/RunInfo.xml" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Lane and tile settings" + arguments: + - type: "integer" + name: "--bcl_only_lane" + description: "Convert only specified lane number (default all lanes)" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--first_tile_only" + description: "Only convert first tile of input (for testing & debugging)" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--tiles" + description: "Process only a subset of tiles by a regular expression" + info: null + example: + - "s_[0-9]+_1" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--exclude_tiles" + description: "Exclude set of tiles by a regular expression" + info: null + example: + - "s_[0-9]+_1" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Resource arguments" + arguments: + - type: "boolean" + name: "--shared_thread_odirect_output" + description: "Use linux native asynchronous io (io_submit) for file output (Default=false)" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bcl_num_parallel_tiles" + description: "\\# of tiles to process in parallel (default 1)" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bcl_num_conversion_threads" + description: "\\# of threads for conversion (per tile, default # cpu threads)" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bcl_num_compression_threads" + description: "\\# of threads for fastq.gz output compression (per tile, default\ + \ # cpu threads, or HW+12)" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bcl_num_decompression_threads" + description: "\\# of threads for bcl/cbcl input decompression (per tile, default\ + \ half # cpu threads, or HW+8). Only applies when preloading files" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Run arguments" + arguments: + - type: "boolean" + name: "--bcl_only_matched_reads" + description: "For pure BCL conversion, do not output files for 'Undetermined'\ + \ [unmatched] reads (output by default)" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--no_lane_splitting" + description: "Do not split FASTQ file by lane (false by default)" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_unknown_barcodes_reported" + description: "\\# of Top Unknown Barcodes to output (1000 by default)" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--bcl_validate_sample_sheet_only" + description: "Only validate RunInfo.xml & SampleSheet files (produce no FASTQ\ + \ files)" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--strict_mode" + description: "Abort if any files are missing (false by default)" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--sample_name_column_enabled" + description: "Use sample sheet 'Sample_Name' column when naming fastq files &\ + \ subdirectories" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output_directory" + alternatives: + - "-o" + description: "Output directory containig fastq files" + info: null + example: + - "fastq_dir" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--bcl_sampleproject_subdirectories" + description: "Output to subdirectories based upon sample sheet 'Sample_Project'\ + \ column" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fastq_gzip_compression_level" + description: "Set fastq output compression level 0-9 (default 1)" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--reports" + description: "Reports directory" + info: null + example: + - "reports_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--logs" + description: "Reports directory" + info: null + example: + - "logs_dir" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--force" + description: "Allow destination directory to already exist and overwrite files.\n" + info: null + example: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\ + \ upgrading from bcl2fastq via\n[Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)\n\ + and [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "demultiplex" +- "fastq" +- "bcl" +- "illumina" +license: "Proprietary" +links: + repository: "https://github.com/viash-hub/biobox" + homepage: "https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html" + documentation: "https://support.illumina.com/downloads/bcl-convert-user-guide.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:trixie-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "wget" + - "gdb" + - "which" + - "hostname" + - "alien" + - "procps" + interactive: false + - type: "docker" + run: + - "wget https://s3.amazonaws.com/webdata.illumina.com/downloads/software/bcl-convert/bcl-convert-4.2.7-2.el8.x86_64.rpm\ + \ -O /tmp/bcl-convert.rpm && \\\nalien -i /tmp/bcl-convert.rpm && \\\nrm -rf\ + \ /var/lib/apt/lists/* && \\\nrm /tmp/bcl-convert.rpm\n" + - type: "docker" + run: + - "echo \"bcl-convert: \\\"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/\ + \ s/^bcl-convert\\ Version //p')\\\"\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bcl_convert/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bcl_convert" + executable: "target/nextflow/bcl_convert/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/main.nf new file mode 100644 index 0000000..a794d32 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/main.nf @@ -0,0 +1,4207 @@ +// bcl_convert v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Toni Verbeiren (author, maintainer) +// * Dorien Roosen (author) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "bcl_convert", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Toni Verbeiren", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "github" : "tverbeiren", + "linkedin" : "verbeiren" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist and CEO" + } + ] + } + }, + { + "name" : "Dorien Roosen", + "roles" : [ + "author" + ], + "info" : { + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--bcl_input_directory", + "alternatives" : [ + "-i" + ], + "description" : "Input run directory", + "example" : [ + "bcl_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_sheet", + "alternatives" : [ + "-s" + ], + "description" : "Path to SampleSheet.csv file (default searched for in --bcl_input_directory)", + "example" : [ + "bcl_dir/sample_sheet.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--run_info", + "description" : "Path to RunInfo.xml file (default root of BCL input directory)", + "example" : [ + "bcl_dir/RunInfo.xml" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Lane and tile settings", + "arguments" : [ + { + "type" : "integer", + "name" : "--bcl_only_lane", + "description" : "Convert only specified lane number (default all lanes)", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--first_tile_only", + "description" : "Only convert first tile of input (for testing & debugging)", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--tiles", + "description" : "Process only a subset of tiles by a regular expression", + "example" : [ + "s_[0-9]+_1" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--exclude_tiles", + "description" : "Exclude set of tiles by a regular expression", + "example" : [ + "s_[0-9]+_1" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Resource arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--shared_thread_odirect_output", + "description" : "Use linux native asynchronous io (io_submit) for file output (Default=false)", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bcl_num_parallel_tiles", + "description" : "\\\\# of tiles to process in parallel (default 1)", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bcl_num_conversion_threads", + "description" : "\\\\# of threads for conversion (per tile, default # cpu threads)", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bcl_num_compression_threads", + "description" : "\\\\# of threads for fastq.gz output compression (per tile, default # cpu threads, or HW+12)", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bcl_num_decompression_threads", + "description" : "\\\\# of threads for bcl/cbcl input decompression (per tile, default half # cpu threads, or HW+8). Only applies when preloading files", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Run arguments", + "arguments" : [ + { + "type" : "boolean", + "name" : "--bcl_only_matched_reads", + "description" : "For pure BCL conversion, do not output files for 'Undetermined' [unmatched] reads (output by default)", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--no_lane_splitting", + "description" : "Do not split FASTQ file by lane (false by default)", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_unknown_barcodes_reported", + "description" : "\\\\# of Top Unknown Barcodes to output (1000 by default)", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--bcl_validate_sample_sheet_only", + "description" : "Only validate RunInfo.xml & SampleSheet files (produce no FASTQ files)", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--strict_mode", + "description" : "Abort if any files are missing (false by default)", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--sample_name_column_enabled", + "description" : "Use sample sheet 'Sample_Name' column when naming fastq files & subdirectories", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output_directory", + "alternatives" : [ + "-o" + ], + "description" : "Output directory containig fastq files", + "example" : [ + "fastq_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--bcl_sampleproject_subdirectories", + "description" : "Output to subdirectories based upon sample sheet 'Sample_Project' column", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fastq_gzip_compression_level", + "description" : "Set fastq output compression level 0-9 (default 1)", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reports", + "description" : "Reports directory", + "example" : [ + "reports_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--logs", + "description" : "Reports directory", + "example" : [ + "logs_dir" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--force", + "description" : "Allow destination directory to already exist and overwrite files.\n", + "example" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\n[Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)\nand [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "demultiplex", + "fastq", + "bcl", + "illumina" + ], + "license" : "Proprietary", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "homepage" : "https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html", + "documentation" : "https://support.illumina.com/downloads/bcl-convert-user-guide.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:trixie-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "wget", + "gdb", + "which", + "hostname", + "alien", + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "wget https://s3.amazonaws.com/webdata.illumina.com/downloads/software/bcl-convert/bcl-convert-4.2.7-2.el8.x86_64.rpm -O /tmp/bcl-convert.rpm && \\\\\nalien -i /tmp/bcl-convert.rpm && \\\\\nrm -rf /var/lib/apt/lists/* && \\\\\nrm /tmp/bcl-convert.rpm\n" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"bcl-convert: \\\\\\"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/ s/^bcl-convert\\\\ Version //p')\\\\\\"\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/bcl_convert/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/bcl_convert", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_BCL_INPUT_DIRECTORY+x} ]; then echo "${VIASH_PAR_BCL_INPUT_DIRECTORY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_input_directory='&'#" ; else echo "# par_bcl_input_directory="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SHEET+x} ]; then echo "${VIASH_PAR_SAMPLE_SHEET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_sheet='&'#" ; else echo "# par_sample_sheet="; fi ) +$( if [ ! -z ${VIASH_PAR_RUN_INFO+x} ]; then echo "${VIASH_PAR_RUN_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_info='&'#" ; else echo "# par_run_info="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_ONLY_LANE+x} ]; then echo "${VIASH_PAR_BCL_ONLY_LANE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_only_lane='&'#" ; else echo "# par_bcl_only_lane="; fi ) +$( if [ ! -z ${VIASH_PAR_FIRST_TILE_ONLY+x} ]; then echo "${VIASH_PAR_FIRST_TILE_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_first_tile_only='&'#" ; else echo "# par_first_tile_only="; fi ) +$( if [ ! -z ${VIASH_PAR_TILES+x} ]; then echo "${VIASH_PAR_TILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tiles='&'#" ; else echo "# par_tiles="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE_TILES+x} ]; then echo "${VIASH_PAR_EXCLUDE_TILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude_tiles='&'#" ; else echo "# par_exclude_tiles="; fi ) +$( if [ ! -z ${VIASH_PAR_SHARED_THREAD_ODIRECT_OUTPUT+x} ]; then echo "${VIASH_PAR_SHARED_THREAD_ODIRECT_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_shared_thread_odirect_output='&'#" ; else echo "# par_shared_thread_odirect_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_NUM_PARALLEL_TILES+x} ]; then echo "${VIASH_PAR_BCL_NUM_PARALLEL_TILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_num_parallel_tiles='&'#" ; else echo "# par_bcl_num_parallel_tiles="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_NUM_CONVERSION_THREADS+x} ]; then echo "${VIASH_PAR_BCL_NUM_CONVERSION_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_num_conversion_threads='&'#" ; else echo "# par_bcl_num_conversion_threads="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_NUM_COMPRESSION_THREADS+x} ]; then echo "${VIASH_PAR_BCL_NUM_COMPRESSION_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_num_compression_threads='&'#" ; else echo "# par_bcl_num_compression_threads="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_NUM_DECOMPRESSION_THREADS+x} ]; then echo "${VIASH_PAR_BCL_NUM_DECOMPRESSION_THREADS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_num_decompression_threads='&'#" ; else echo "# par_bcl_num_decompression_threads="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_ONLY_MATCHED_READS+x} ]; then echo "${VIASH_PAR_BCL_ONLY_MATCHED_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_only_matched_reads='&'#" ; else echo "# par_bcl_only_matched_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_LANE_SPLITTING+x} ]; then echo "${VIASH_PAR_NO_LANE_SPLITTING}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_lane_splitting='&'#" ; else echo "# par_no_lane_splitting="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_UNKNOWN_BARCODES_REPORTED+x} ]; then echo "${VIASH_PAR_NUM_UNKNOWN_BARCODES_REPORTED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_unknown_barcodes_reported='&'#" ; else echo "# par_num_unknown_barcodes_reported="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_VALIDATE_SAMPLE_SHEET_ONLY+x} ]; then echo "${VIASH_PAR_BCL_VALIDATE_SAMPLE_SHEET_ONLY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_validate_sample_sheet_only='&'#" ; else echo "# par_bcl_validate_sample_sheet_only="; fi ) +$( if [ ! -z ${VIASH_PAR_STRICT_MODE+x} ]; then echo "${VIASH_PAR_STRICT_MODE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strict_mode='&'#" ; else echo "# par_strict_mode="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_NAME_COLUMN_ENABLED+x} ]; then echo "${VIASH_PAR_SAMPLE_NAME_COLUMN_ENABLED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_name_column_enabled='&'#" ; else echo "# par_sample_name_column_enabled="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DIRECTORY+x} ]; then echo "${VIASH_PAR_OUTPUT_DIRECTORY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_directory='&'#" ; else echo "# par_output_directory="; fi ) +$( if [ ! -z ${VIASH_PAR_BCL_SAMPLEPROJECT_SUBDIRECTORIES+x} ]; then echo "${VIASH_PAR_BCL_SAMPLEPROJECT_SUBDIRECTORIES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bcl_sampleproject_subdirectories='&'#" ; else echo "# par_bcl_sampleproject_subdirectories="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL+x} ]; then echo "${VIASH_PAR_FASTQ_GZIP_COMPRESSION_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_gzip_compression_level='&'#" ; else echo "# par_fastq_gzip_compression_level="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORTS+x} ]; then echo "${VIASH_PAR_REPORTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reports='&'#" ; else echo "# par_reports="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_FORCE+x} ]; then echo "${VIASH_PAR_FORCE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_force='&'#" ; else echo "# par_force="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +[[ "\\$par_force" == "false" ]] && unset par_force + + +\\$(which bcl-convert) \\\\ + --bcl-input-directory "\\$par_bcl_input_directory" \\\\ + --output-directory "\\$par_output_directory" \\\\ + \\${par_force:+--force} \\\\ + \\${par_sample_sheet:+ --sample-sheet "\\$par_sample_sheet"} \\\\ + \\${par_run_info:+ --run-info "\\$par_run_info"} \\\\ + \\${par_bcl_only_lane:+ --bcl-only-lane "\\$par_bcl_only_lane"} \\\\ + \\${par_first_tile_only:+ --first-tile-only "\\$par_first_tile_only"} \\\\ + \\${par_tiles:+ --tiles "\\$par_tiles"} \\\\ + \\${par_exclude_tiles:+ --exclude-tiles "\\$par_exclude_tiles"} \\\\ + \\${par_shared_thread_odirect_output:+ --shared-thread-odirect-output "\\$par_shared_thread_odirect_output"} \\\\ + \\${par_bcl_num_parallel_tiles:+ --bcl-num-parallel-tiles "\\$par_bcl_num_parallel_tiles"} \\\\ + \\${par_bcl_num_conversion_threads:+ --bcl-num-conversion-threads "\\$par_bcl_num_conversion_threads"} \\\\ + \\${par_bcl_num_compression_threads:+ --bcl-num-compression-threads "\\$par_bcl_num_compression_threads"} \\\\ + \\${par_bcl_num_decompression_threads:+ --bcl-num-decompression-threads "\\$par_bcl_num_decompression_threads"} \\\\ + \\${par_bcl_only_matched_reads:+ --bcl-only-matched-reads "\\$par_bcl_only_matched_reads"} \\\\ + \\${par_no_lane_splitting:+ --no-lane-splitting "\\$par_no_lane_splitting"} \\\\ + \\${par_num_unknown_barcodes_reported:+ --num-unknown-barcodes-reported "\\$par_num_unknown_barcodes_reported"} \\\\ + \\${par_bcl_validate_sample_sheet_only:+ --bcl-validate-sample-sheet-only "\\$par_bcl_validate_sample_sheet_only"} \\\\ + \\${par_strict_mode:+ --strict-mode "\\$par_strict_mode"} \\\\ + \\${par_sample_name_column_enabled:+ --sample-name-column-enabled "\\$par_sample_name_column_enabled"} \\\\ + \\${par_bcl_sampleproject_subdirectories:+ --bcl-sampleproject-subdirectories "\\$par_bcl_sampleproject_subdirectories"} \\\\ + \\${par_fastq_gzip_compression_level:+ --fastq-gzip-compression-level "\\$par_fastq_gzip_compression_level"} + +if [ ! -z "\\$par_reports" ]; then + echo "Moving reports to their own location" + mv "\\${par_output_directory}/Reports" "\\$par_reports" +else + echo "Leaving reports alone" +fi + +if [ ! -z "\\$par_logs" ]; then + echo "Moving logs to their own location" + mv "\\${par_output_directory}/Logs" "\\$par_logs" +else + echo "Leaving logs alone" +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/bcl_convert", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/nextflow.config new file mode 100644 index 0000000..4dc4994 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'bcl_convert' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\n[Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)\nand [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)\n' + author = 'Toni Verbeiren, Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/nextflow_schema.json new file mode 100644 index 0000000..9927d09 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/nextflow_schema.json @@ -0,0 +1,359 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bcl_convert", +"description": "Convert bcl files to fastq files using bcl-convert.\nInformation about upgrading from bcl2fastq via\n[Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)\nand [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)\n", +"type": "object", +"definitions": { + + + + "input arguments" : { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + + + "bcl_input_directory": { + "type": + "string", + "description": "Type: `file`, required, example: `bcl_dir`. Input run directory", + "help_text": "Type: `file`, required, example: `bcl_dir`. Input run directory" + + } + + + , + "sample_sheet": { + "type": + "string", + "description": "Type: `file`, example: `bcl_dir/sample_sheet.csv`. Path to SampleSheet", + "help_text": "Type: `file`, example: `bcl_dir/sample_sheet.csv`. Path to SampleSheet.csv file (default searched for in --bcl_input_directory)" + + } + + + , + "run_info": { + "type": + "string", + "description": "Type: `file`, example: `bcl_dir/RunInfo.xml`. Path to RunInfo", + "help_text": "Type: `file`, example: `bcl_dir/RunInfo.xml`. Path to RunInfo.xml file (default root of BCL input directory)" + + } + + +} +}, + + + "lane and tile settings" : { + "title": "Lane and tile settings", + "type": "object", + "description": "No description", + "properties": { + + + "bcl_only_lane": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Convert only specified lane number (default all lanes)", + "help_text": "Type: `integer`, example: `1`. Convert only specified lane number (default all lanes)" + + } + + + , + "first_tile_only": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Only convert first tile of input (for testing \u0026 debugging)", + "help_text": "Type: `boolean`, example: `true`. Only convert first tile of input (for testing \u0026 debugging)" + + } + + + , + "tiles": { + "type": + "string", + "description": "Type: `string`, example: `s_[0-9]+_1`. Process only a subset of tiles by a regular expression", + "help_text": "Type: `string`, example: `s_[0-9]+_1`. Process only a subset of tiles by a regular expression" + + } + + + , + "exclude_tiles": { + "type": + "string", + "description": "Type: `string`, example: `s_[0-9]+_1`. Exclude set of tiles by a regular expression", + "help_text": "Type: `string`, example: `s_[0-9]+_1`. Exclude set of tiles by a regular expression" + + } + + +} +}, + + + "resource arguments" : { + "title": "Resource arguments", + "type": "object", + "description": "No description", + "properties": { + + + "shared_thread_odirect_output": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Use linux native asynchronous io (io_submit) for file output (Default=false)", + "help_text": "Type: `boolean`, example: `true`. Use linux native asynchronous io (io_submit) for file output (Default=false)" + + } + + + , + "bcl_num_parallel_tiles": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. \\# of tiles to process in parallel (default 1)", + "help_text": "Type: `integer`, example: `1`. \\# of tiles to process in parallel (default 1)" + + } + + + , + "bcl_num_conversion_threads": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. \\# of threads for conversion (per tile, default # cpu threads)", + "help_text": "Type: `integer`, example: `1`. \\# of threads for conversion (per tile, default # cpu threads)" + + } + + + , + "bcl_num_compression_threads": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. \\# of threads for fastq", + "help_text": "Type: `integer`, example: `1`. \\# of threads for fastq.gz output compression (per tile, default # cpu threads, or HW+12)" + + } + + + , + "bcl_num_decompression_threads": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. \\# of threads for bcl/cbcl input decompression (per tile, default half # cpu threads, or HW+8)", + "help_text": "Type: `integer`, example: `1`. \\# of threads for bcl/cbcl input decompression (per tile, default half # cpu threads, or HW+8). Only applies when preloading files" + + } + + +} +}, + + + "run arguments" : { + "title": "Run arguments", + "type": "object", + "description": "No description", + "properties": { + + + "bcl_only_matched_reads": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. For pure BCL conversion, do not output files for \u0027Undetermined\u0027 [unmatched] reads (output by default)", + "help_text": "Type: `boolean`, example: `true`. For pure BCL conversion, do not output files for \u0027Undetermined\u0027 [unmatched] reads (output by default)" + + } + + + , + "no_lane_splitting": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Do not split FASTQ file by lane (false by default)", + "help_text": "Type: `boolean`, example: `true`. Do not split FASTQ file by lane (false by default)" + + } + + + , + "num_unknown_barcodes_reported": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. \\# of Top Unknown Barcodes to output (1000 by default)", + "help_text": "Type: `integer`, example: `1000`. \\# of Top Unknown Barcodes to output (1000 by default)" + + } + + + , + "bcl_validate_sample_sheet_only": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Only validate RunInfo", + "help_text": "Type: `boolean`, example: `true`. Only validate RunInfo.xml \u0026 SampleSheet files (produce no FASTQ files)" + + } + + + , + "strict_mode": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Abort if any files are missing (false by default)", + "help_text": "Type: `boolean`, example: `true`. Abort if any files are missing (false by default)" + + } + + + , + "sample_name_column_enabled": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Use sample sheet \u0027Sample_Name\u0027 column when naming fastq files \u0026 subdirectories", + "help_text": "Type: `boolean`, example: `true`. Use sample sheet \u0027Sample_Name\u0027 column when naming fastq files \u0026 subdirectories" + + } + + +} +}, + + + "output arguments" : { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + + + "output_directory": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_directory`, example: `fastq_dir`. Output directory containig fastq files", + "help_text": "Type: `file`, required, default: `$id.$key.output_directory`, example: `fastq_dir`. Output directory containig fastq files" + , + "default":"$id.$key.output_directory" + } + + + , + "bcl_sampleproject_subdirectories": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Output to subdirectories based upon sample sheet \u0027Sample_Project\u0027 column", + "help_text": "Type: `boolean`, example: `true`. Output to subdirectories based upon sample sheet \u0027Sample_Project\u0027 column" + + } + + + , + "fastq_gzip_compression_level": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Set fastq output compression level 0-9 (default 1)", + "help_text": "Type: `integer`, example: `1`. Set fastq output compression level 0-9 (default 1)" + + } + + + , + "reports": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.reports`, example: `reports_dir`. Reports directory", + "help_text": "Type: `file`, default: `$id.$key.reports`, example: `reports_dir`. Reports directory" + , + "default":"$id.$key.reports" + } + + + , + "logs": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.logs`, example: `logs_dir`. Reports directory", + "help_text": "Type: `file`, default: `$id.$key.logs`, example: `logs_dir`. Reports directory" + , + "default":"$id.$key.logs" + } + + + , + "force": { + "type": + "boolean", + "description": "Type: `boolean`, example: `true`. Allow destination directory to already exist and overwrite files", + "help_text": "Type: `boolean`, example: `true`. Allow destination directory to already exist and overwrite files.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input arguments" + }, + + { + "$ref": "#/definitions/lane and tile settings" + }, + + { + "$ref": "#/definitions/resource arguments" + }, + + { + "$ref": "#/definitions/run arguments" + }, + + { + "$ref": "#/definitions/output arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/.config.vsh.yaml new file mode 100644 index 0000000..bb6131b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/.config.vsh.yaml @@ -0,0 +1,358 @@ +name: "falco" +version: "v0.3.1" +authors: +- name: "Toni Verbeiren" + roles: + - "author" + - "maintainer" + info: + links: + github: "tverbeiren" + linkedin: "verbeiren" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist and CEO" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "input fastq files" + info: null + example: + - "input1.fastq;input2.fastq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Run arguments" + arguments: + - type: "boolean_true" + name: "--nogroup" + description: "Disable grouping of bases for reads >50bp. \nAll reports will show\ + \ data for every base in \nthe read. WARNING: When using this option, \nyour\ + \ plots may end up a ridiculous size. You \nhave been warned!\n" + info: null + direction: "input" + - type: "file" + name: "--contaminents" + description: "Specifies a non-default file which contains \nthe list of contaminants\ + \ to screen \noverrepresented sequences against. The file \nmust contain sets\ + \ of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith\ + \ a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--adapters" + description: "Specifies a non-default file which contains \nthe list of adapter\ + \ sequences which will be \nexplicity searched against the library. The \nfile\ + \ must contain sets of named adapters in \nthe form name[tab]sequence. Lines\ + \ prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--limits" + description: "Specifies a non-default file which contains \na set of criteria\ + \ which will be used to \ndetermine the warn/error limits for the \nvarious\ + \ modules. This file can also be used \nto selectively remove some modules from\ + \ the \noutput all together. The format needs to \nmirror the default limits.txt\ + \ file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--subsample" + alternatives: + - "-s" + description: "[Falco only] makes falco faster (but \npossibly less accurate) by\ + \ only processing \nreads that are a multiple of this value (using \n0-based\ + \ indexing to number reads).\n" + info: null + example: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bisulfite" + alternatives: + - "-b" + description: "[Falco only] reads are whole genome \nbisulfite sequencing, and\ + \ more Ts and fewer \nCs are therefore expected and will be \naccounted for\ + \ in base content.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--reverse_complement" + alternatives: + - "-r" + description: "[Falco only] The input is a \nreverse-complement. All modules will\ + \ be \ntested by swapping A/T and C/G\n" + info: null + direction: "input" +- name: "Output arguments" + arguments: + - type: "file" + name: "--outdir" + alternatives: + - "-o" + description: "Create all output files in the specified \noutput directory. FALCO-SPECIFIC:\ + \ If the \ndirectory does not exists, the program will \ncreate it.\n" + info: null + example: + - "output" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--format" + alternatives: + - "-f" + description: "Bypasses the normal sequence file format \ndetection and forces\ + \ the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped,\ + \ sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n" + info: null + required: false + choices: + - "bam" + - "sam" + - "bam_mapped" + - "sam_mapped" + - "fastq" + - "fq" + - "fastq.gz" + - "fq.gz" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--data_filename" + alternatives: + - "-D" + description: "[Falco only] Specify filename for FastQC \ndata output (TXT). If\ + \ not specified, it will \nbe called fastq_data.txt in either the input \nfile's\ + \ directory or the one specified in the \n--output flag. Only available when\ + \ running \nfalco with a single input.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--report_filename" + alternatives: + - "-R" + description: "[Falco only] Specify filename for FastQC \nreport output (HTML).\ + \ If not specified, it \nwill be called fastq_report.html in either \nthe input\ + \ file's directory or the one \nspecified in the --output flag. Only \navailable\ + \ when running falco with a single \ninput.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--summary_filename" + alternatives: + - "-S" + description: "[Falco only] Specify filename for the short \nsummary output (TXT).\ + \ If not specified, it \nwill be called fastq_report.html in either \nthe input\ + \ file's directory or the one \nspecified in the --output flag. Only \navailable\ + \ when running falco with a single \ninput.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "A C++ drop-in replacement of FastQC to assess the quality of sequence\ + \ read data" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +keywords: +- "qc" +- "fastqc" +- "sequencing" +license: "GPL-3.0" +references: + doi: + - "10.12688/f1000research.21142.2" +links: + repository: "https://github.com/smithlabcode/falco" + documentation: "https://falco.readthedocs.io/en/latest/" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:trixie-slim" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "wget" + - "build-essential" + - "g++" + - "zlib1g-dev" + - "procps" + interactive: false + - type: "docker" + run: + - "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz\ + \ -O /tmp/falco.tar.gz && \\\ncd /tmp && \\\ntar xvf falco.tar.gz && \\\ncd\ + \ falco-1.2.2 && \\\n./configure && \\\nmake all && \\\nmake install\n" + - type: "docker" + run: + - "echo \"falco: \\\"$(falco -v | sed -n 's/^falco //p')\\\"\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/falco/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/falco" + executable: "target/nextflow/falco/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/main.nf new file mode 100644 index 0000000..53f3022 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/main.nf @@ -0,0 +1,4005 @@ +// falco v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Toni Verbeiren (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "falco", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Toni Verbeiren", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "github" : "tverbeiren", + "linkedin" : "verbeiren" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist and CEO" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "input fastq files", + "example" : [ + "input1.fastq;input2.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Run arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--nogroup", + "description" : "Disable grouping of bases for reads >50bp. \nAll reports will show data for every base in \nthe read. WARNING: When using this option, \nyour plots may end up a ridiculous size. You \nhave been warned!\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--contaminents", + "description" : "Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against. The file \nmust contain sets of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--adapters", + "description" : "Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library. The \nfile must contain sets of named adapters in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--limits", + "description" : "Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules. This file can also be used \nto selectively remove some modules from the \noutput all together. The format needs to \nmirror the default limits.txt file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--subsample", + "alternatives" : [ + "-s" + ], + "description" : "[Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads).\n", + "example" : [ + 10 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bisulfite", + "alternatives" : [ + "-b" + ], + "description" : "[Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--reverse_complement", + "alternatives" : [ + "-r" + ], + "description" : "[Falco only] The input is a \nreverse-complement. All modules will be \ntested by swapping A/T and C/G\n", + "direction" : "input" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--outdir", + "alternatives" : [ + "-o" + ], + "description" : "Create all output files in the specified \noutput directory. FALCO-SPECIFIC: If the \ndirectory does not exists, the program will \ncreate it.\n", + "example" : [ + "output" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--format", + "alternatives" : [ + "-f" + ], + "description" : "Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped, sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n", + "required" : false, + "choices" : [ + "bam", + "sam", + "bam_mapped", + "sam_mapped", + "fastq", + "fq", + "fastq.gz", + "fq.gz" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--data_filename", + "alternatives" : [ + "-D" + ], + "description" : "[Falco only] Specify filename for FastQC \ndata output (TXT). If not specified, it will \nbe called fastq_data.txt in either the input \nfile's directory or the one specified in the \n--output flag. Only available when running \nfalco with a single input.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--report_filename", + "alternatives" : [ + "-R" + ], + "description" : "[Falco only] Specify filename for FastQC \nreport output (HTML). If not specified, it \nwill be called fastq_report.html in either \nthe input file's directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--summary_filename", + "alternatives" : [ + "-S" + ], + "description" : "[Falco only] Specify filename for the short \nsummary output (TXT). If not specified, it \nwill be called fastq_report.html in either \nthe input file's directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "A C++ drop-in replacement of FastQC to assess the quality of sequence read data", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "qc", + "fastqc", + "sequencing" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.12688/f1000research.21142.2" + ] + }, + "links" : { + "repository" : "https://github.com/smithlabcode/falco", + "documentation" : "https://falco.readthedocs.io/en/latest/" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:trixie-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "wget", + "build-essential", + "g++", + "zlib1g-dev", + "procps" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz -O /tmp/falco.tar.gz && \\\\\ncd /tmp && \\\\\ntar xvf falco.tar.gz && \\\\\ncd falco-1.2.2 && \\\\\n./configure && \\\\\nmake all && \\\\\nmake install\n" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"falco: \\\\\\"$(falco -v | sed -n 's/^falco //p')\\\\\\"\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/falco/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/falco", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_NOGROUP+x} ]; then echo "${VIASH_PAR_NOGROUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nogroup='&'#" ; else echo "# par_nogroup="; fi ) +$( if [ ! -z ${VIASH_PAR_CONTAMINENTS+x} ]; then echo "${VIASH_PAR_CONTAMINENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_contaminents='&'#" ; else echo "# par_contaminents="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTERS+x} ]; then echo "${VIASH_PAR_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapters='&'#" ; else echo "# par_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_LIMITS+x} ]; then echo "${VIASH_PAR_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limits='&'#" ; else echo "# par_limits="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "${VIASH_PAR_SUBSAMPLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subsample='&'#" ; else echo "# par_subsample="; fi ) +$( if [ ! -z ${VIASH_PAR_BISULFITE+x} ]; then echo "${VIASH_PAR_BISULFITE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bisulfite='&'#" ; else echo "# par_bisulfite="; fi ) +$( if [ ! -z ${VIASH_PAR_REVERSE_COMPLEMENT+x} ]; then echo "${VIASH_PAR_REVERSE_COMPLEMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reverse_complement='&'#" ; else echo "# par_reverse_complement="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTDIR+x} ]; then echo "${VIASH_PAR_OUTDIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_outdir='&'#" ; else echo "# par_outdir="; fi ) +$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_FILENAME+x} ]; then echo "${VIASH_PAR_DATA_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_filename='&'#" ; else echo "# par_data_filename="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT_FILENAME+x} ]; then echo "${VIASH_PAR_REPORT_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report_filename='&'#" ; else echo "# par_report_filename="; fi ) +$( if [ ! -z ${VIASH_PAR_SUMMARY_FILENAME+x} ]; then echo "${VIASH_PAR_SUMMARY_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary_filename='&'#" ; else echo "# par_summary_filename="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +[[ "\\$par_nogroup" == "false" ]] && unset par_nogroup +[[ "\\$par_bisulfite" == "false" ]] && unset par_bisulfite +[[ "\\$par_reverse_complement" == "false" ]] && unset par_reverse_complement + +IFS=";" read -ra input <<< \\$par_input + +\\$(which falco) \\\\ + \\${par_nogroup:+--nogroup} \\\\ + \\${par_contaminants:+--contaminants "\\$par_contaminants"} \\\\ + \\${par_adapters:+--adapters "\\$par_adapters"} \\\\ + \\${par_limits:+--limits "\\$par_limits"} \\\\ + \\${par_subsample:+-subsample \\$par_subsample} \\\\ + \\${par_bisulfite:+-bisulfite} \\\\ + \\${par_reverse_complement:+-reverse-complement} \\\\ + \\${par_outdir:+--outdir "\\$par_outdir"} \\\\ + \\${par_format:+--format "\\$par_format"} \\\\ + \\${par_data_filename:+-data-filename "\\$par_data_filename"} \\\\ + \\${par_report_filename:+-report-filename "\\$par_report_filename"} \\\\ + \\${par_summary_filename:+-summary-filename "\\$par_summary_filename"} \\\\ + \\${input[*]} +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/falco", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/nextflow.config new file mode 100644 index 0000000..a6578d5 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'falco' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'A C++ drop-in replacement of FastQC to assess the quality of sequence read data' + author = 'Toni Verbeiren' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/nextflow_schema.json new file mode 100644 index 0000000..35d4cfa --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/nextflow_schema.json @@ -0,0 +1,227 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "falco", +"description": "A C++ drop-in replacement of FastQC to assess the quality of sequence read data", +"type": "object", +"definitions": { + + + + "input arguments" : { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input1.fastq;input2.fastq`, multiple_sep: `\";\"`. input fastq files", + "help_text": "Type: List of `file`, required, example: `input1.fastq;input2.fastq`, multiple_sep: `\";\"`. input fastq files" + + } + + +} +}, + + + "run arguments" : { + "title": "Run arguments", + "type": "object", + "description": "No description", + "properties": { + + + "nogroup": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp", + "help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in \nthe read. WARNING: When using this option, \nyour plots may end up a ridiculous size. You \nhave been warned!\n" + , + "default":false + } + + + , + "contaminents": { + "type": + "string", + "description": "Type: `file`. Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against", + "help_text": "Type: `file`. Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against. The file \nmust contain sets of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n" + + } + + + , + "adapters": { + "type": + "string", + "description": "Type: `file`. Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library", + "help_text": "Type: `file`. Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library. The \nfile must contain sets of named adapters in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n" + + } + + + , + "limits": { + "type": + "string", + "description": "Type: `file`. Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules", + "help_text": "Type: `file`. Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules. This file can also be used \nto selectively remove some modules from the \noutput all together. The format needs to \nmirror the default limits.txt file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n" + + } + + + , + "subsample": { + "type": + "integer", + "description": "Type: `integer`, example: `10`. [Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads)", + "help_text": "Type: `integer`, example: `10`. [Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads).\n" + + } + + + , + "bisulfite": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content", + "help_text": "Type: `boolean_true`, default: `false`. [Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content.\n" + , + "default":false + } + + + , + "reverse_complement": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. [Falco only] The input is a \nreverse-complement", + "help_text": "Type: `boolean_true`, default: `false`. [Falco only] The input is a \nreverse-complement. All modules will be \ntested by swapping A/T and C/G\n" + , + "default":false + } + + +} +}, + + + "output arguments" : { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + + + "outdir": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.outdir`, example: `output`. Create all output files in the specified \noutput directory", + "help_text": "Type: `file`, required, default: `$id.$key.outdir`, example: `output`. Create all output files in the specified \noutput directory. FALCO-SPECIFIC: If the \ndirectory does not exists, the program will \ncreate it.\n" + , + "default":"$id.$key.outdir" + } + + + , + "format": { + "type": + "string", + "description": "Type: `string`, choices: ``bam`, `sam`, `bam_mapped`, `sam_mapped`, `fastq`, `fq`, `fastq.gz`, `fq.gz``. Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format", + "help_text": "Type: `string`, choices: ``bam`, `sam`, `bam_mapped`, `sam_mapped`, `fastq`, `fq`, `fastq.gz`, `fq.gz``. Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped, sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n", + "enum": ["bam", "sam", "bam_mapped", "sam_mapped", "fastq", "fq", "fastq.gz", "fq.gz"] + + + } + + + , + "data_filename": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.data_filename`. [Falco only] Specify filename for FastQC \ndata output (TXT)", + "help_text": "Type: `file`, default: `$id.$key.data_filename`. [Falco only] Specify filename for FastQC \ndata output (TXT). If not specified, it will \nbe called fastq_data.txt in either the input \nfile\u0027s directory or the one specified in the \n--output flag. Only available when running \nfalco with a single input.\n" + , + "default":"$id.$key.data_filename" + } + + + , + "report_filename": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.report_filename`. [Falco only] Specify filename for FastQC \nreport output (HTML)", + "help_text": "Type: `file`, default: `$id.$key.report_filename`. [Falco only] Specify filename for FastQC \nreport output (HTML). If not specified, it \nwill be called fastq_report.html in either \nthe input file\u0027s directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n" + , + "default":"$id.$key.report_filename" + } + + + , + "summary_filename": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.summary_filename`. [Falco only] Specify filename for the short \nsummary output (TXT)", + "help_text": "Type: `file`, default: `$id.$key.summary_filename`. [Falco only] Specify filename for the short \nsummary output (TXT). If not specified, it \nwill be called fastq_report.html in either \nthe input file\u0027s directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n" + , + "default":"$id.$key.summary_filename" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input arguments" + }, + + { + "$ref": "#/definitions/run arguments" + }, + + { + "$ref": "#/definitions/output arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml new file mode 100644 index 0000000..a191eda --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/.config.vsh.yaml @@ -0,0 +1,497 @@ +name: "multiqc" +version: "v0.3.1" +authors: +- name: "Dorien Roosen" + roles: + - "author" + - "maintainer" + info: + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Input" + arguments: + - type: "file" + name: "--input" + description: "File paths to be searched for analysis results to be included in\ + \ the report.\n" + info: null + example: + - "data/results" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Ouput" + arguments: + - type: "file" + name: "--output_report" + description: "Filepath of the generated report.\n" + info: null + example: + - "multiqc_report.html" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_data" + description: "Output directory for parsed data files. If not provided, parsed\ + \ data will not be published.\n" + info: null + example: + - "multiqc_data" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_plots" + description: "Output directory for generated plots. If not provided, plots will\ + \ not be published.\n" + info: null + example: + - "multiqc_plots" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Modules and analyses to run" + arguments: + - type: "string" + name: "--include_modules" + description: "Use only these module" + info: null + example: + - "fastqc" + - "cutadapt" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--exclude_modules" + description: "Do not use only these modules" + info: null + example: + - "fastqc" + - "cutadapt" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--ignore_analysis" + info: null + example: + - "run_one/*" + - "run_two/*" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--ignore_samples" + info: null + example: + - "sample_1*" + - "sample_3*" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--ignore_symlinks" + description: "Ignore symlinked directories and files" + info: null + direction: "input" +- name: "Sample name handling" + arguments: + - type: "boolean_true" + name: "--dirs" + description: "Prepend directory to sample names to avoid clashing filenames" + info: null + direction: "input" + - type: "integer" + name: "--dirs_depth" + description: "Prepend n directories to sample names. Negative number to take from\ + \ start of path." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--full_names" + description: "Do not clean the sample names (leave as full file name)" + info: null + direction: "input" + - type: "boolean_true" + name: "--fn_as_s_name" + description: "Use the log filename as the sample name" + info: null + direction: "input" + - type: "file" + name: "--replace_names" + description: "TSV file to rename sample names during report generation" + info: null + example: + - "replace_names.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Report Customisation" + arguments: + - type: "string" + name: "--title" + description: "Report title. Printed as page header, used for filename if not otherwise\ + \ specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--comment" + description: "Custom comment, will be printed at the top of the report.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--template" + description: "Report template to use.\n" + info: null + required: false + choices: + - "default" + - "gathered" + - "geo" + - "highcharts" + - "sections" + - "simple" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_names" + description: "TSV file containing alternative sample names for renaming buttons\ + \ in the report.\n" + info: null + example: + - "sample_names.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_filters" + description: "TSV file containing show/hide patterns for the report\n" + info: null + example: + - "sample_filters.tsv" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--custom_css_file" + description: "Custom CSS file to add to the final report\n" + info: null + example: + - "custom_style_sheet.css" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--profile_runtime" + description: "Add analysis of how long MultiQC takes to run to the report\n" + info: null + direction: "input" +- name: "MultiQC behaviour" + arguments: + - type: "boolean_true" + name: "--verbose" + description: "Increase output verbosity.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--quiet" + description: "Only show log warnings\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--strict" + description: "Don't catch exceptions, run additional code checks to help development.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--development" + description: "Development mode. Do not compress and minimise JS, export uncompressed\ + \ plot data.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--require_logs" + description: "Require all explicitly requested modules to have log files. If not,\ + \ MultiQC will exit with an error.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_megaqc_upload" + description: "Don't upload generated report to MegaQC, even if MegaQC options\ + \ are found.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_ansi" + description: "Disable coloured log output.\n" + info: null + direction: "input" + - type: "string" + name: "--cl_config" + description: "YAML formatted string that allows to customize MultiQC behaviour\ + \ like input file detection.\n" + info: null + example: + - "qualimap_config: { general_stats_coverage: [20,40,200] }" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output format" + arguments: + - type: "boolean_true" + name: "--flat" + description: "Use only flat plots (static images).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--interactive" + description: "Use only interactive plots (in-browser Javascript).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--data_dir" + description: "Force the parsed data directory to be created.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_data_dir" + description: "Prevent the parsed data directory from being created.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--zip_data_dir" + description: "Compress the data directory.\n" + info: null + direction: "input" + - type: "string" + name: "--data_format" + description: "Output parsed data in a different format than the default 'txt'.\n" + info: null + required: false + choices: + - "tsv" + - "csv" + - "json" + - "yaml" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--pdf" + description: "Creates PDF report with the 'simple' template. Requires Pandoc to\ + \ be installed.\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "MultiQC aggregates results from bioinformatics analyses across many\ + \ samples into a single report.\nIt searches a given directory for analysis logs\ + \ and compiles a HTML report. It's a general use tool, perfect for summarising the\ + \ output from numerous bioinformatics tools.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: + keywords: + - "QC" + - "html report" + - "aggregate analysis" + links: + homepage: "https://multiqc.info/" + documentation: "https://multiqc.info/docs/" + repository: "https://github.com/MultiQC/MultiQC" + references: + doi: "10.1093/bioinformatics/btw354" + licence: "GPL v3 or later" +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/biobox" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" + target_registry: "images.viash-hub.com" + target_tag: "v0.3.1" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "multiqc --version | sed 's/multiqc, version\\s\\(.*\\)/multiqc: \"\\1\"/' >\ + \ /var/software_versions.txt\n" + test_setup: + - type: "apt" + packages: + - "jq" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/multiqc/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/multiqc" + executable: "target/nextflow/multiqc/main.nf" + viash_version: "0.9.4" + git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223" + git_remote: "https://github.com/viash-hub/biobox" + git_tag: "v0.3.0-8-g98a5f3c" +package_config: + name: "biobox" + version: "v0.3.1" + summary: "A curated collection of high-quality, standalone bioinformatics components\ + \ built with [Viash](https://viash.io).\n" + description: "`biobox` offers a suite of reliable bioinformatics components, similar\ + \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ + \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ + \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ + \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ + \ Run components directly via the command line or seamlessly integrate them into\ + \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ + \ * Containerized (Docker) for dependency management and reproducibility.\n\ + \ * Unit tested for verified functionality.\n" + info: null + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf new file mode 100644 index 0000000..b1e672a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf @@ -0,0 +1,4336 @@ +// multiqc v0.3.1 +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "multiqc", + "version" : "v0.3.1", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "File paths to be searched for analysis results to be included in the report.\n", + "example" : [ + "data/results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Ouput", + "arguments" : [ + { + "type" : "file", + "name" : "--output_report", + "description" : "Filepath of the generated report.\n", + "example" : [ + "multiqc_report.html" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_data", + "description" : "Output directory for parsed data files. If not provided, parsed data will not be published.\n", + "example" : [ + "multiqc_data" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_plots", + "description" : "Output directory for generated plots. If not provided, plots will not be published.\n", + "example" : [ + "multiqc_plots" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Modules and analyses to run", + "arguments" : [ + { + "type" : "string", + "name" : "--include_modules", + "description" : "Use only these module", + "example" : [ + "fastqc", + "cutadapt" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--exclude_modules", + "description" : "Do not use only these modules", + "example" : [ + "fastqc", + "cutadapt" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ignore_analysis", + "example" : [ + "run_one/*", + "run_two/*" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ignore_samples", + "example" : [ + "sample_1*", + "sample_3*" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--ignore_symlinks", + "description" : "Ignore symlinked directories and files", + "direction" : "input" + } + ] + }, + { + "name" : "Sample name handling", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--dirs", + "description" : "Prepend directory to sample names to avoid clashing filenames", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--dirs_depth", + "description" : "Prepend n directories to sample names. Negative number to take from start of path.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--full_names", + "description" : "Do not clean the sample names (leave as full file name)", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fn_as_s_name", + "description" : "Use the log filename as the sample name", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--replace_names", + "description" : "TSV file to rename sample names during report generation", + "example" : [ + "replace_names.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Report Customisation", + "arguments" : [ + { + "type" : "string", + "name" : "--title", + "description" : "Report title. Printed as page header, used for filename if not otherwise specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--comment", + "description" : "Custom comment, will be printed at the top of the report.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--template", + "description" : "Report template to use.\n", + "required" : false, + "choices" : [ + "default", + "gathered", + "geo", + "highcharts", + "sections", + "simple" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_names", + "description" : "TSV file containing alternative sample names for renaming buttons in the report.\n", + "example" : [ + "sample_names.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_filters", + "description" : "TSV file containing show/hide patterns for the report\n", + "example" : [ + "sample_filters.tsv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--custom_css_file", + "description" : "Custom CSS file to add to the final report\n", + "example" : [ + "custom_style_sheet.css" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--profile_runtime", + "description" : "Add analysis of how long MultiQC takes to run to the report\n", + "direction" : "input" + } + ] + }, + { + "name" : "MultiQC behaviour", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--verbose", + "description" : "Increase output verbosity.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "description" : "Only show log warnings\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--strict", + "description" : "Don't catch exceptions, run additional code checks to help development.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--development", + "description" : "Development mode. Do not compress and minimise JS, export uncompressed plot data.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--require_logs", + "description" : "Require all explicitly requested modules to have log files. If not, MultiQC will exit with an error.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_megaqc_upload", + "description" : "Don't upload generated report to MegaQC, even if MegaQC options are found.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_ansi", + "description" : "Disable coloured log output.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--cl_config", + "description" : "YAML formatted string that allows to customize MultiQC behaviour like input file detection.\n", + "example" : [ + "qualimap_config: { general_stats_coverage: [20,40,200] }" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output format", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--flat", + "description" : "Use only flat plots (static images).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--interactive", + "description" : "Use only interactive plots (in-browser Javascript).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--data_dir", + "description" : "Force the parsed data directory to be created.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_data_dir", + "description" : "Prevent the parsed data directory from being created.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--zip_data_dir", + "description" : "Compress the data directory.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--data_format", + "description" : "Output parsed data in a different format than the default 'txt'.\n", + "required" : false, + "choices" : [ + "tsv", + "csv", + "json", + "yaml" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--pdf", + "description" : "Creates PDF report with the 'simple' template. Requires Pandoc to be installed.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "info" : { + "keywords" : [ + "QC", + "html report", + "aggregate analysis" + ], + "links" : { + "homepage" : "https://multiqc.info/", + "documentation" : "https://multiqc.info/docs/", + "repository" : "https://github.com/MultiQC/MultiQC" + }, + "references" : { + "doi" : "10.1093/bioinformatics/btw354" + }, + "licence" : "GPL v3 or later" + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/biobox" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "v0.3.1", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "multiqc --version | sed 's/multiqc, version\\\\s\\\\(.*\\\\)/multiqc: \\"\\\\1\\"/' > /var/software_versions.txt\n" + ] + } + ], + "test_setup" : [ + { + "type" : "apt", + "packages" : [ + "jq" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/multiqc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/multiqc", + "viash_version" : "0.9.4", + "git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223", + "git_remote" : "https://github.com/viash-hub/biobox", + "git_tag" : "v0.3.0-8-g98a5f3c" + }, + "package_config" : { + "name" : "biobox", + "version" : "v0.3.1", + "summary" : "A curated collection of high-quality, standalone bioinformatics components built with [Viash](https://viash.io).\n", + "description" : "`biobox` offers a suite of reliable bioinformatics components, similar to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio), but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes **reusability**, **reproducibility**, and adherence to **best practices**. Key features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:** Run components directly via the command line or seamlessly integrate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for components and parameters.\n * Full exposure of underlying tool arguments.\n * Containerized (Docker) for dependency management and reproducibility.\n * Unit tested for verified functionality.\n", + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'v0.3.1'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_REPORT+x} ]; then echo "${VIASH_PAR_OUTPUT_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_report='&'#" ; else echo "# par_output_report="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DATA+x} ]; then echo "${VIASH_PAR_OUTPUT_DATA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_data='&'#" ; else echo "# par_output_data="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOTS+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_plots='&'#" ; else echo "# par_output_plots="; fi ) +$( if [ ! -z ${VIASH_PAR_INCLUDE_MODULES+x} ]; then echo "${VIASH_PAR_INCLUDE_MODULES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_include_modules='&'#" ; else echo "# par_include_modules="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE_MODULES+x} ]; then echo "${VIASH_PAR_EXCLUDE_MODULES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude_modules='&'#" ; else echo "# par_exclude_modules="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_ANALYSIS+x} ]; then echo "${VIASH_PAR_IGNORE_ANALYSIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_analysis='&'#" ; else echo "# par_ignore_analysis="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_SAMPLES+x} ]; then echo "${VIASH_PAR_IGNORE_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_samples='&'#" ; else echo "# par_ignore_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_SYMLINKS+x} ]; then echo "${VIASH_PAR_IGNORE_SYMLINKS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_symlinks='&'#" ; else echo "# par_ignore_symlinks="; fi ) +$( if [ ! -z ${VIASH_PAR_DIRS+x} ]; then echo "${VIASH_PAR_DIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dirs='&'#" ; else echo "# par_dirs="; fi ) +$( if [ ! -z ${VIASH_PAR_DIRS_DEPTH+x} ]; then echo "${VIASH_PAR_DIRS_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_dirs_depth='&'#" ; else echo "# par_dirs_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_FULL_NAMES+x} ]; then echo "${VIASH_PAR_FULL_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_full_names='&'#" ; else echo "# par_full_names="; fi ) +$( if [ ! -z ${VIASH_PAR_FN_AS_S_NAME+x} ]; then echo "${VIASH_PAR_FN_AS_S_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fn_as_s_name='&'#" ; else echo "# par_fn_as_s_name="; fi ) +$( if [ ! -z ${VIASH_PAR_REPLACE_NAMES+x} ]; then echo "${VIASH_PAR_REPLACE_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_replace_names='&'#" ; else echo "# par_replace_names="; fi ) +$( if [ ! -z ${VIASH_PAR_TITLE+x} ]; then echo "${VIASH_PAR_TITLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_title='&'#" ; else echo "# par_title="; fi ) +$( if [ ! -z ${VIASH_PAR_COMMENT+x} ]; then echo "${VIASH_PAR_COMMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_comment='&'#" ; else echo "# par_comment="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMPLATE+x} ]; then echo "${VIASH_PAR_TEMPLATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_template='&'#" ; else echo "# par_template="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_NAMES+x} ]; then echo "${VIASH_PAR_SAMPLE_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_names='&'#" ; else echo "# par_sample_names="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_FILTERS+x} ]; then echo "${VIASH_PAR_SAMPLE_FILTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_filters='&'#" ; else echo "# par_sample_filters="; fi ) +$( if [ ! -z ${VIASH_PAR_CUSTOM_CSS_FILE+x} ]; then echo "${VIASH_PAR_CUSTOM_CSS_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_custom_css_file='&'#" ; else echo "# par_custom_css_file="; fi ) +$( if [ ! -z ${VIASH_PAR_PROFILE_RUNTIME+x} ]; then echo "${VIASH_PAR_PROFILE_RUNTIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_profile_runtime='&'#" ; else echo "# par_profile_runtime="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_STRICT+x} ]; then echo "${VIASH_PAR_STRICT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strict='&'#" ; else echo "# par_strict="; fi ) +$( if [ ! -z ${VIASH_PAR_DEVELOPMENT+x} ]; then echo "${VIASH_PAR_DEVELOPMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_development='&'#" ; else echo "# par_development="; fi ) +$( if [ ! -z ${VIASH_PAR_REQUIRE_LOGS+x} ]; then echo "${VIASH_PAR_REQUIRE_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_require_logs='&'#" ; else echo "# par_require_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_MEGAQC_UPLOAD+x} ]; then echo "${VIASH_PAR_NO_MEGAQC_UPLOAD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_megaqc_upload='&'#" ; else echo "# par_no_megaqc_upload="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_ANSI+x} ]; then echo "${VIASH_PAR_NO_ANSI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_ansi='&'#" ; else echo "# par_no_ansi="; fi ) +$( if [ ! -z ${VIASH_PAR_CL_CONFIG+x} ]; then echo "${VIASH_PAR_CL_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cl_config='&'#" ; else echo "# par_cl_config="; fi ) +$( if [ ! -z ${VIASH_PAR_FLAT+x} ]; then echo "${VIASH_PAR_FLAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_flat='&'#" ; else echo "# par_flat="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERACTIVE+x} ]; then echo "${VIASH_PAR_INTERACTIVE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interactive='&'#" ; else echo "# par_interactive="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_DIR+x} ]; then echo "${VIASH_PAR_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_dir='&'#" ; else echo "# par_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_DATA_DIR+x} ]; then echo "${VIASH_PAR_NO_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_data_dir='&'#" ; else echo "# par_no_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP_DATA_DIR+x} ]; then echo "${VIASH_PAR_ZIP_DATA_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip_data_dir='&'#" ; else echo "# par_zip_data_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA_FORMAT+x} ]; then echo "${VIASH_PAR_DATA_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_format='&'#" ; else echo "# par_data_format="; fi ) +$( if [ ! -z ${VIASH_PAR_PDF+x} ]; then echo "${VIASH_PAR_PDF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pdf='&'#" ; else echo "# par_pdf="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +# disable flags +unset_if_false=( + par_ignore_symlinks + par_dirs + par_full_names + par_fn_as_s_name + par_profile_runtime + par_verbose + par_quiet + par_strict + par_development + par_require_logs + par_no_megaqc_upload + par_no_ansi + par_flat + par_interactive + par_static_plot_export + par_data_dir + par_no_data_dir + par_zip_data_dir + par_pdf +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +# handle inputs +out_dir=\\$(dirname "\\$par_output_report") +output_report_file=\\$(basename "\\$par_output_report") +report_name="\\${output_report_file%.*}" + +# handle outputs +[[ -z "\\$par_output_report" ]] && no_report=true +[[ -z "\\$par_output_data" ]] && no_data_dir=true +[[ ! -z "\\$par_output_data" ]] && data_dir=true +[[ ! -z "\\$par_output_plots" ]] && export=true + +# handle multiples +IFS=";" read -ra inputs <<< \\$par_input + +if [[ -n "\\$par_include_modules" ]]; then + include_modules="" + IFS=";" read -ra incl_modules <<< \\$par_include_modules + for i in "\\${incl_modules[@]}"; do + include_modules+="--include \\$i " + done + unset IFS +fi + +if [[ -n "\\$par_exclude_modules" ]]; then + exclude_modules="" + IFS=";" read -ra excl_modules <<< \\$par_exclude_modules + for i in "\\${excl_modules[@]}"; do + exclude_modules+="--exclude \\$i" + done + unset IFS +fi + +if [[ -n "\\$par_ignore_analysis" ]]; then + ignore="" + IFS=";" read -ra ignore_analysis <<< \\$par_ignore_analysis + for i in "\\${ignore_analysis[@]}"; do + ignore+="--ignore \\$i " + done + unset IFS +fi + +if [[ -n "\\$par_ignore_samples" ]]; then + ignore_samples="" + IFS=";" read -ra ign_samples <<< \\$par_ignore_samples + for i in "\\${ign_samples[@]}"; do + ignore_samples+="--ignore-samples \\$i" + done + unset IFS +fi + +# run multiqc +multiqc \\\\ + \\${par_output_report:+--filename "\\$report_name"} \\\\ + \\${out_dir:+--outdir "\\$out_dir"} \\\\ + \\${no_report:+--no-report} \\\\ + \\${no_data_dir:+--no-data-dir} \\\\ + \\${data_dir:+--data-dir} \\\\ + \\${export:+--export} \\\\ + \\${par_title:+--title "\\$par_title"} \\\\ + \\${par_comment:+--comment "\\$par_comment"} \\\\ + \\${par_template:+--template "\\$par_template"} \\\\ + \\${par_sample_names:+--sample-names "\\$par_sample_names"} \\\\ + \\${par_sample_filters:+--sample-filters "\\$par_sample_filters"} \\\\ + \\${par_custom_css_file:+--custom-css-file "\\$par_custom_css_file"} \\\\ + \\${par_profile_runtime:+--profile-runtime} \\\\ + \\${par_dirs:+--dirs} \\\\ + \\${par_dirs_depth:+--dirs-depth "\\$par_dirs_depth"} \\\\ + \\${par_full_names:+--full-names} \\\\ + \\${par_fn_as_s_name:+--fn-as-s-name} \\\\ + \\${par_ignore_names:+--ignore-names "\\$par_ignore_names"} \\\\ + \\${par_ignore_symlinks:+--ignore-symlinks} \\\\ + \\${ignore_samples} \\\\ + \\${ignore} \\\\ + \\${exclude_modules} \\\\ + \\${include_modules} \\\\ + \\${par_include_modules:+--include-modules "\\$par_include_modules"} \\\\ + \\${par_data_format:+--data-format "\\$par_data_format"} \\\\ + \\${par_cl_config:+--cl-config "\\$par_cl_config"} \\\\ + \\${par_zip_data_dir:+--zip-data-dir} \\\\ + \\${par_pdf:+--pdf} \\\\ + \\${par_interactive:+--interactive} \\\\ + \\${par_flat:+--flat} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${par_strict:+--strict} \\\\ + \\${par_no_megaqc_upload:+--no-megaqc-upload} \\\\ + \\${par_no_ansi:+--no-ansi} \\\\ + \\${par_profile_runtime:+--profile-runtime} \\\\ + \\${par_require_logs:+--require-logs} \\\\ + \\${par_development:+--development} \\\\ + --force \\\\ + "\\${inputs[@]}" + +# Move outputs + +if [[ -n "\\$par_output_data" ]] && [[ -d "\\${out_dir}/\\${report_name}_data" ]]; then + mv "\\${out_dir}/\\${report_name}_data" "\\$par_output_data" +elif [[ -n "\\$par_output_data" ]] && [[ ! -d "\\${out_dir}/\\${report_name}_data" ]]; then + echo "WARNING: Data could not be saved because data folder was not generated by multiqc. This could be due to filtering out of modules or samples." +fi + +if [[ -n "\\$par_output_plots" ]] && [[ -d "\\${out_dir}/\\${report_name}_plots" ]]; then + mv "\\${out_dir}/\\${report_name}_plots" "\\$par_output_plots" +elif [[ -n "\\$par_output_plots" ]] && [[ ! -d "\\${out_dir}/\\${report_name}_plots" ]]; then + echo "WARNING: Plots could not be saved because plots folder was not generated by multiqc. This could be due to filtering out of modules or samples." +fi +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/multiqc", + "tag" : "v0.3.1" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config new file mode 100644 index 0000000..3efa32d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'multiqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'v0.3.1' + description = 'MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\'s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n' + author = 'Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json new file mode 100644 index 0000000..04c0a3b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/nextflow_schema.json @@ -0,0 +1,529 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "multiqc", +"description": "MultiQC aggregates results from bioinformatics analyses across many samples into a single report.\nIt searches a given directory for analysis logs and compiles a HTML report. It\u0027s a general use tool, perfect for summarising the output from numerous bioinformatics tools.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `data/results`, multiple_sep: `\";\"`. File paths to be searched for analysis results to be included in the report", + "help_text": "Type: List of `file`, required, example: `data/results`, multiple_sep: `\";\"`. File paths to be searched for analysis results to be included in the report.\n" + + } + + +} +}, + + + "ouput" : { + "title": "Ouput", + "type": "object", + "description": "No description", + "properties": { + + + "output_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_report.html`, example: `multiqc_report.html`. Filepath of the generated report", + "help_text": "Type: `file`, default: `$id.$key.output_report.html`, example: `multiqc_report.html`. Filepath of the generated report.\n" + , + "default":"$id.$key.output_report.html" + } + + + , + "output_data": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_data`, example: `multiqc_data`. Output directory for parsed data files", + "help_text": "Type: `file`, default: `$id.$key.output_data`, example: `multiqc_data`. Output directory for parsed data files. If not provided, parsed data will not be published.\n" + , + "default":"$id.$key.output_data" + } + + + , + "output_plots": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output_plots`, example: `multiqc_plots`. Output directory for generated plots", + "help_text": "Type: `file`, default: `$id.$key.output_plots`, example: `multiqc_plots`. Output directory for generated plots. If not provided, plots will not be published.\n" + , + "default":"$id.$key.output_plots" + } + + +} +}, + + + "modules and analyses to run" : { + "title": "Modules and analyses to run", + "type": "object", + "description": "No description", + "properties": { + + + "include_modules": { + "type": + "string", + "description": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Use only these module", + "help_text": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Use only these module" + + } + + + , + "exclude_modules": { + "type": + "string", + "description": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Do not use only these modules", + "help_text": "Type: List of `string`, example: `fastqc;cutadapt`, multiple_sep: `\";\"`. Do not use only these modules" + + } + + + , + "ignore_analysis": { + "type": + "string", + "description": "Type: List of `string`, example: `run_one/*;run_two/*`, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `string`, example: `run_one/*;run_two/*`, multiple_sep: `\";\"`. " + + } + + + , + "ignore_samples": { + "type": + "string", + "description": "Type: List of `string`, example: `sample_1*;sample_3*`, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `string`, example: `sample_1*;sample_3*`, multiple_sep: `\";\"`. " + + } + + + , + "ignore_symlinks": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore symlinked directories and files", + "help_text": "Type: `boolean_true`, default: `false`. Ignore symlinked directories and files" + , + "default":false + } + + +} +}, + + + "sample name handling" : { + "title": "Sample name handling", + "type": "object", + "description": "No description", + "properties": { + + + "dirs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Prepend directory to sample names to avoid clashing filenames", + "help_text": "Type: `boolean_true`, default: `false`. Prepend directory to sample names to avoid clashing filenames" + , + "default":false + } + + + , + "dirs_depth": { + "type": + "integer", + "description": "Type: `integer`. Prepend n directories to sample names", + "help_text": "Type: `integer`. Prepend n directories to sample names. Negative number to take from start of path." + + } + + + , + "full_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not clean the sample names (leave as full file name)", + "help_text": "Type: `boolean_true`, default: `false`. Do not clean the sample names (leave as full file name)" + , + "default":false + } + + + , + "fn_as_s_name": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the log filename as the sample name", + "help_text": "Type: `boolean_true`, default: `false`. Use the log filename as the sample name" + , + "default":false + } + + + , + "replace_names": { + "type": + "string", + "description": "Type: `file`, example: `replace_names.tsv`. TSV file to rename sample names during report generation", + "help_text": "Type: `file`, example: `replace_names.tsv`. TSV file to rename sample names during report generation" + + } + + +} +}, + + + "report customisation" : { + "title": "Report Customisation", + "type": "object", + "description": "No description", + "properties": { + + + "title": { + "type": + "string", + "description": "Type: `string`. Report title", + "help_text": "Type: `string`. Report title. Printed as page header, used for filename if not otherwise specified.\n" + + } + + + , + "comment": { + "type": + "string", + "description": "Type: `string`. Custom comment, will be printed at the top of the report", + "help_text": "Type: `string`. Custom comment, will be printed at the top of the report.\n" + + } + + + , + "template": { + "type": + "string", + "description": "Type: `string`, choices: ``default`, `gathered`, `geo`, `highcharts`, `sections`, `simple``. Report template to use", + "help_text": "Type: `string`, choices: ``default`, `gathered`, `geo`, `highcharts`, `sections`, `simple``. Report template to use.\n", + "enum": ["default", "gathered", "geo", "highcharts", "sections", "simple"] + + + } + + + , + "sample_names": { + "type": + "string", + "description": "Type: `file`, example: `sample_names.tsv`. TSV file containing alternative sample names for renaming buttons in the report", + "help_text": "Type: `file`, example: `sample_names.tsv`. TSV file containing alternative sample names for renaming buttons in the report.\n" + + } + + + , + "sample_filters": { + "type": + "string", + "description": "Type: `file`, example: `sample_filters.tsv`. TSV file containing show/hide patterns for the report\n", + "help_text": "Type: `file`, example: `sample_filters.tsv`. TSV file containing show/hide patterns for the report\n" + + } + + + , + "custom_css_file": { + "type": + "string", + "description": "Type: `file`, example: `custom_style_sheet.css`. Custom CSS file to add to the final report\n", + "help_text": "Type: `file`, example: `custom_style_sheet.css`. Custom CSS file to add to the final report\n" + + } + + + , + "profile_runtime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add analysis of how long MultiQC takes to run to the report\n", + "help_text": "Type: `boolean_true`, default: `false`. Add analysis of how long MultiQC takes to run to the report\n" + , + "default":false + } + + +} +}, + + + "multiqc behaviour" : { + "title": "MultiQC behaviour", + "type": "object", + "description": "No description", + "properties": { + + + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Increase output verbosity", + "help_text": "Type: `boolean_true`, default: `false`. Increase output verbosity.\n" + , + "default":false + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Only show log warnings\n", + "help_text": "Type: `boolean_true`, default: `false`. Only show log warnings\n" + , + "default":false + } + + + , + "strict": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t catch exceptions, run additional code checks to help development", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t catch exceptions, run additional code checks to help development.\n" + , + "default":false + } + + + , + "development": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Development mode", + "help_text": "Type: `boolean_true`, default: `false`. Development mode. Do not compress and minimise JS, export uncompressed plot data.\n" + , + "default":false + } + + + , + "require_logs": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Require all explicitly requested modules to have log files", + "help_text": "Type: `boolean_true`, default: `false`. Require all explicitly requested modules to have log files. If not, MultiQC will exit with an error.\n" + , + "default":false + } + + + , + "no_megaqc_upload": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Don\u0027t upload generated report to MegaQC, even if MegaQC options are found", + "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t upload generated report to MegaQC, even if MegaQC options are found.\n" + , + "default":false + } + + + , + "no_ansi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable coloured log output", + "help_text": "Type: `boolean_true`, default: `false`. Disable coloured log output.\n" + , + "default":false + } + + + , + "cl_config": { + "type": + "string", + "description": "Type: `string`, example: `qualimap_config: { general_stats_coverage: [20,40,200] }`. YAML formatted string that allows to customize MultiQC behaviour like input file detection", + "help_text": "Type: `string`, example: `qualimap_config: { general_stats_coverage: [20,40,200] }`. YAML formatted string that allows to customize MultiQC behaviour like input file detection.\n" + + } + + +} +}, + + + "output format" : { + "title": "Output format", + "type": "object", + "description": "No description", + "properties": { + + + "flat": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use only flat plots (static images)", + "help_text": "Type: `boolean_true`, default: `false`. Use only flat plots (static images).\n" + , + "default":false + } + + + , + "interactive": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use only interactive plots (in-browser Javascript)", + "help_text": "Type: `boolean_true`, default: `false`. Use only interactive plots (in-browser Javascript).\n" + , + "default":false + } + + + , + "data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Force the parsed data directory to be created", + "help_text": "Type: `boolean_true`, default: `false`. Force the parsed data directory to be created.\n" + , + "default":false + } + + + , + "no_data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Prevent the parsed data directory from being created", + "help_text": "Type: `boolean_true`, default: `false`. Prevent the parsed data directory from being created.\n" + , + "default":false + } + + + , + "zip_data_dir": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Compress the data directory", + "help_text": "Type: `boolean_true`, default: `false`. Compress the data directory.\n" + , + "default":false + } + + + , + "data_format": { + "type": + "string", + "description": "Type: `string`, choices: ``tsv`, `csv`, `json`, `yaml``. Output parsed data in a different format than the default \u0027txt\u0027", + "help_text": "Type: `string`, choices: ``tsv`, `csv`, `json`, `yaml``. Output parsed data in a different format than the default \u0027txt\u0027.\n", + "enum": ["tsv", "csv", "json", "yaml"] + + + } + + + , + "pdf": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Creates PDF report with the \u0027simple\u0027 template", + "help_text": "Type: `boolean_true`, default: `false`. Creates PDF report with the \u0027simple\u0027 template. Requires Pandoc to be installed.\n" + , + "default":false + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/ouput" + }, + + { + "$ref": "#/definitions/modules and analyses to run" + }, + + { + "$ref": "#/definitions/sample name handling" + }, + + { + "$ref": "#/definitions/report customisation" + }, + + { + "$ref": "#/definitions/multiqc behaviour" + }, + + { + "$ref": "#/definitions/output format" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/executable/io/interop_summary_to_csv/.config.vsh.yaml b/target/executable/io/interop_summary_to_csv/.config.vsh.yaml new file mode 100644 index 0000000..33aa6af --- /dev/null +++ b/target/executable/io/interop_summary_to_csv/.config.vsh.yaml @@ -0,0 +1,191 @@ +name: "interop_summary_to_csv" +namespace: "io" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Sequencing run folder (*not* InterOp folder)." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output_run_summary" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_index_summary" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "iseq-DI" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "summary" + - "index-summary" + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "move-detect-demultiplex-subworkflow" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "wget" + interactive: false + - type: "docker" + run: + - "wget https://github.com/Illumina/interop/releases/download/v1.3.1/interop-1.3.1-Linux-GNU.tar.gz\ + \ -O /tmp/interop.tar.gz && \\\ntar -C /tmp/ --no-same-owner --no-same-permissions\ + \ -xvf /tmp/interop.tar.gz && \\\nmv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary\ + \ /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/io/interop_summary_to_csv/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/io/interop_summary_to_csv" + executable: "target/executable/io/interop_summary_to_csv/interop_summary_to_csv" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/executable/io/interop_summary_to_csv/interop_summary_to_csv b/target/executable/io/interop_summary_to_csv/interop_summary_to_csv new file mode 100755 index 0000000..2ac0da1 --- /dev/null +++ b/target/executable/io/interop_summary_to_csv/interop_summary_to_csv @@ -0,0 +1,1144 @@ +#!/usr/bin/env bash + +# interop_summary_to_csv move-detect-demultiplex-subworkflow +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="interop_summary_to_csv" +VIASH_META_FUNCTIONALITY_NAME="interop_summary_to_csv" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM debian:stable-slim +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps wget && \ + rm -rf /var/lib/apt/lists/* + +RUN wget https://github.com/Illumina/interop/releases/download/v1.3.1/interop-1.3.1-Linux-GNU.tar.gz -O /tmp/interop.tar.gz && \ +tar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \ +mv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/ + +LABEL org.opencontainers.image.description="Companion container for running component io interop_summary_to_csv" +LABEL org.opencontainers.image.created="2025-05-27T14:23:50Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex" +LABEL org.opencontainers.image.revision="84bb47bfb87cd0420f52ec97c15236311de4c94c" +LABEL org.opencontainers.image.version="move-detect-demultiplex-subworkflow" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "interop_summary_to_csv move-detect-demultiplex-subworkflow" + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Sequencing run folder (*not* InterOp folder)." + echo "" + echo "Output arguments:" + echo " --output_run_summary" + echo " type: file, required parameter, output, file must exist" + echo "" + echo " --output_index_summary" + echo " type: file, required parameter, output, file must exist" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "interop_summary_to_csv move-detect-demultiplex-subworkflow" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_run_summary) + [ -n "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ] && ViashError Bad arguments for option \'--output_run_summary\': \'$VIASH_PAR_OUTPUT_RUN_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_RUN_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_run_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_run_summary=*) + [ -n "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ] && ViashError Bad arguments for option \'--output_run_summary=*\': \'$VIASH_PAR_OUTPUT_RUN_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_RUN_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_index_summary) + [ -n "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ] && ViashError Bad arguments for option \'--output_index_summary\': \'$VIASH_PAR_OUTPUT_INDEX_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_INDEX_SUMMARY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_index_summary. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_index_summary=*) + [ -n "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ] && ViashError Bad arguments for option \'--output_index_summary=*\': \'$VIASH_PAR_OUTPUT_INDEX_SUMMARY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_INDEX_SUMMARY=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/demultiplex/io/interop_summary_to_csv:move-detect-demultiplex-subworkflow' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'summary' 'index-summary' 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'summary' 'index-summary' 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_RUN_SUMMARY+x} ]; then + ViashError '--output_run_summary' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT_INDEX_SUMMARY+x} ]; then + ViashError '--output_index_summary' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_RUN_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_RUN_SUMMARY")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_INDEX_SUMMARY")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_INDEX_SUMMARY")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_RUN_SUMMARY")" ) + VIASH_PAR_OUTPUT_RUN_SUMMARY=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_RUN_SUMMARY") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_INDEX_SUMMARY")" ) + VIASH_PAR_OUTPUT_INDEX_SUMMARY=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_INDEX_SUMMARY") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-interop_summary_to_csv-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_RUN_SUMMARY+x} ]; then echo "${VIASH_PAR_OUTPUT_RUN_SUMMARY}" | sed "s#'#'\"'\"'#g;s#.*#par_output_run_summary='&'#" ; else echo "# par_output_run_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_INDEX_SUMMARY+x} ]; then echo "${VIASH_PAR_OUTPUT_INDEX_SUMMARY}" | sed "s#'#'\"'\"'#g;s#.*#par_output_index_summary='&'#" ; else echo "# par_output_index_summary="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/usr/bin/env bash + +set -eo pipefail + +if [ ! -d "\$par_input" ]; then + echo "Input directory does not exist or is not a directory" + exit 1 +fi +\$(which summary) --csv=1 "\$par_input" 1> "\$par_output_run_summary" +\$(which index-summary) --csv=1 "\$par_input" 1> "\$par_output_index_summary" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ]; then + VIASH_PAR_OUTPUT_RUN_SUMMARY=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_RUN_SUMMARY") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ]; then + VIASH_PAR_OUTPUT_INDEX_SUMMARY=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_INDEX_SUMMARY") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ] && [ ! -e "$VIASH_PAR_OUTPUT_RUN_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_RUN_SUMMARY' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ] && [ ! -e "$VIASH_PAR_OUTPUT_INDEX_SUMMARY" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_INDEX_SUMMARY' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/io/interop_summary_to_csv/nextflow_labels.config b/target/executable/io/interop_summary_to_csv/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/executable/io/interop_summary_to_csv/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/executable/io/publish/.config.vsh.yaml b/target/executable/io/publish/.config.vsh.yaml new file mode 100644 index 0000000..4fc3625 --- /dev/null +++ b/target/executable/io/publish/.config.vsh.yaml @@ -0,0 +1,253 @@ +name: "publish" +namespace: "io" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Directory to write fastq data to" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_falco" + description: "Directory to write falco output to" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--input_multiqc" + description: "Location where to write the MultiQC report to." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_run_information" + description: "Location where to write the run information to." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_demultiplexer_logs" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_falco" + info: null + default: + - "qc/fastqc" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_multiqc" + info: null + default: + - "qc/multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_run_information" + info: null + default: + - "run_information.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_demultiplexer_logs" + info: null + default: + - "demultiplexer_logs" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "code.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Publish the processed results of the run" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "move-detect-demultiplex-subworkflow" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/io/publish/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/io/publish" + executable: "target/executable/io/publish/publish" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/executable/io/publish/nextflow_labels.config b/target/executable/io/publish/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/executable/io/publish/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/executable/io/publish/publish b/target/executable/io/publish/publish new file mode 100755 index 0000000..043163e --- /dev/null +++ b/target/executable/io/publish/publish @@ -0,0 +1,1424 @@ +#!/usr/bin/env bash + +# publish move-detect-demultiplex-subworkflow +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="publish" +VIASH_META_FUNCTIONALITY_NAME="publish" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM debian:stable-slim +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.description="Companion container for running component io publish" +LABEL org.opencontainers.image.created="2025-05-27T14:23:50Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex" +LABEL org.opencontainers.image.revision="84bb47bfb87cd0420f52ec97c15236311de4c94c" +LABEL org.opencontainers.image.version="move-detect-demultiplex-subworkflow" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "publish move-detect-demultiplex-subworkflow" + echo "" + echo "Publish the processed results of the run" + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Directory to write fastq data to" + echo "" + echo " --input_falco" + echo " type: file, required parameter, multiple values allowed, file must exist" + echo " Directory to write falco output to" + echo "" + echo " --input_multiqc" + echo " type: file, required parameter, file must exist" + echo " Location where to write the MultiQC report to." + echo "" + echo " --input_run_information" + echo " type: file, required parameter, file must exist" + echo " Location where to write the run information to." + echo "" + echo " --input_demultiplexer_logs" + echo " type: file, required parameter, file must exist" + echo "" + echo "Output arguments:" + echo " --output" + echo " type: file, output, file must exist" + echo " default: fastq" + echo "" + echo " --output_falco" + echo " type: file, output, file must exist" + echo " default: qc/fastqc" + echo "" + echo " --output_multiqc" + echo " type: file, output, file must exist" + echo " default: qc/multiqc_report.html" + echo "" + echo " --output_run_information" + echo " type: file, output, file must exist" + echo " default: run_information.csv" + echo "" + echo " --output_demultiplexer_logs" + echo " type: file, output, file must exist" + echo " default: demultiplexer_logs" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "publish move-detect-demultiplex-subworkflow" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_falco) + if [ -z "$VIASH_PAR_INPUT_FALCO" ]; then + VIASH_PAR_INPUT_FALCO="$2" + else + VIASH_PAR_INPUT_FALCO="$VIASH_PAR_INPUT_FALCO;""$2" + fi + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_falco. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_falco=*) + if [ -z "$VIASH_PAR_INPUT_FALCO" ]; then + VIASH_PAR_INPUT_FALCO=$(ViashRemoveFlags "$1") + else + VIASH_PAR_INPUT_FALCO="$VIASH_PAR_INPUT_FALCO;"$(ViashRemoveFlags "$1") + fi + shift 1 + ;; + --input_multiqc) + [ -n "$VIASH_PAR_INPUT_MULTIQC" ] && ViashError Bad arguments for option \'--input_multiqc\': \'$VIASH_PAR_INPUT_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_multiqc=*) + [ -n "$VIASH_PAR_INPUT_MULTIQC" ] && ViashError Bad arguments for option \'--input_multiqc=*\': \'$VIASH_PAR_INPUT_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_run_information) + [ -n "$VIASH_PAR_INPUT_RUN_INFORMATION" ] && ViashError Bad arguments for option \'--input_run_information\': \'$VIASH_PAR_INPUT_RUN_INFORMATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_RUN_INFORMATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_run_information. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_run_information=*) + [ -n "$VIASH_PAR_INPUT_RUN_INFORMATION" ] && ViashError Bad arguments for option \'--input_run_information=*\': \'$VIASH_PAR_INPUT_RUN_INFORMATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_RUN_INFORMATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_demultiplexer_logs) + [ -n "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS" ] && ViashError Bad arguments for option \'--input_demultiplexer_logs\': \'$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_demultiplexer_logs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_demultiplexer_logs=*) + [ -n "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS" ] && ViashError Bad arguments for option \'--input_demultiplexer_logs=*\': \'$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_falco) + [ -n "$VIASH_PAR_OUTPUT_FALCO" ] && ViashError Bad arguments for option \'--output_falco\': \'$VIASH_PAR_OUTPUT_FALCO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FALCO="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_falco. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_falco=*) + [ -n "$VIASH_PAR_OUTPUT_FALCO" ] && ViashError Bad arguments for option \'--output_falco=*\': \'$VIASH_PAR_OUTPUT_FALCO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_FALCO=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_multiqc) + [ -n "$VIASH_PAR_OUTPUT_MULTIQC" ] && ViashError Bad arguments for option \'--output_multiqc\': \'$VIASH_PAR_OUTPUT_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MULTIQC="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_multiqc. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_multiqc=*) + [ -n "$VIASH_PAR_OUTPUT_MULTIQC" ] && ViashError Bad arguments for option \'--output_multiqc=*\': \'$VIASH_PAR_OUTPUT_MULTIQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MULTIQC=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_run_information) + [ -n "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ] && ViashError Bad arguments for option \'--output_run_information\': \'$VIASH_PAR_OUTPUT_RUN_INFORMATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_RUN_INFORMATION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_run_information. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_run_information=*) + [ -n "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ] && ViashError Bad arguments for option \'--output_run_information=*\': \'$VIASH_PAR_OUTPUT_RUN_INFORMATION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_RUN_INFORMATION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_demultiplexer_logs) + [ -n "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ] && ViashError Bad arguments for option \'--output_demultiplexer_logs\': \'$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_demultiplexer_logs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_demultiplexer_logs=*) + [ -n "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ] && ViashError Bad arguments for option \'--output_demultiplexer_logs=*\': \'$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/demultiplex/io/publish:move-detect-demultiplex-subworkflow' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_FALCO+x} ]; then + ViashError '--input_falco' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_MULTIQC+x} ]; then + ViashError '--input_multiqc' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_RUN_INFORMATION+x} ]; then + ViashError '--input_run_information' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS+x} ]; then + ViashError '--input_demultiplexer_logs' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + VIASH_PAR_OUTPUT="fastq" +fi +if [ -z ${VIASH_PAR_OUTPUT_FALCO+x} ]; then + VIASH_PAR_OUTPUT_FALCO="qc/fastqc" +fi +if [ -z ${VIASH_PAR_OUTPUT_MULTIQC+x} ]; then + VIASH_PAR_OUTPUT_MULTIQC="qc/multiqc_report.html" +fi +if [ -z ${VIASH_PAR_OUTPUT_RUN_INFORMATION+x} ]; then + VIASH_PAR_OUTPUT_RUN_INFORMATION="run_information.csv" +fi +if [ -z ${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS+x} ]; then + VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS="demultiplexer_logs" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_INPUT_FALCO; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f +fi +if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ] && [ ! -e "$VIASH_PAR_INPUT_MULTIQC" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_RUN_INFORMATION" ] && [ ! -e "$VIASH_PAR_INPUT_RUN_INFORMATION" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_RUN_INFORMATION' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS" ] && [ ! -e "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FALCO")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FALCO")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_MULTIQC")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_MULTIQC")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_RUN_INFORMATION")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_RUN_INFORMATION")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then + VIASH_TEST_INPUT_FALCO=() + IFS=';' + for var in $VIASH_PAR_INPUT_FALCO; do + unset IFS + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) + var=$(ViashDockerAutodetectMount "$var") + VIASH_TEST_INPUT_FALCO+=( "$var" ) + done + VIASH_PAR_INPUT_FALCO=$(IFS=';' ; echo "${VIASH_TEST_INPUT_FALCO[*]}") +fi +if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_MULTIQC")" ) + VIASH_PAR_INPUT_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_MULTIQC") +fi +if [ ! -z "$VIASH_PAR_INPUT_RUN_INFORMATION" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_RUN_INFORMATION")" ) + VIASH_PAR_INPUT_RUN_INFORMATION=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_RUN_INFORMATION") +fi +if [ ! -z "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS")" ) + VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_FALCO")" ) + VIASH_PAR_OUTPUT_FALCO=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_FALCO") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_FALCO" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_MULTIQC")" ) + VIASH_PAR_OUTPUT_MULTIQC=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_MULTIQC") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_MULTIQC" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_RUN_INFORMATION")" ) + VIASH_PAR_OUTPUT_RUN_INFORMATION=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_RUN_INFORMATION") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS")" ) + VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-publish-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FALCO+x} ]; then echo "${VIASH_PAR_INPUT_FALCO}" | sed "s#'#'\"'\"'#g;s#.*#par_input_falco='&'#" ; else echo "# par_input_falco="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_INPUT_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_input_multiqc='&'#" ; else echo "# par_input_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_INPUT_RUN_INFORMATION}" | sed "s#'#'\"'\"'#g;s#.*#par_input_run_information='&'#" ; else echo "# par_input_run_information="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\"'\"'#g;s#.*#par_input_demultiplexer_logs='&'#" ; else echo "# par_input_demultiplexer_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FALCO+x} ]; then echo "${VIASH_PAR_OUTPUT_FALCO}" | sed "s#'#'\"'\"'#g;s#.*#par_output_falco='&'#" ; else echo "# par_output_falco="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_OUTPUT_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_output_multiqc='&'#" ; else echo "# par_output_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_OUTPUT_RUN_INFORMATION}" | sed "s#'#'\"'\"'#g;s#.*#par_output_run_information='&'#" ; else echo "# par_output_run_information="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\"'\"'#g;s#.*#par_output_demultiplexer_logs='&'#" ; else echo "# par_output_demultiplexer_logs="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +declare -A input_output_mapping=(["par_input"]="par_output" + ["par_input_multiqc"]="par_output_multiqc" + ["par_input_run_information"]="par_output_run_information" + ["par_input_demultiplexer_logs"]="par_output_demultiplexer_logs" + ) + +for input_argument_name in "\${!input_output_mapping[@]}" +do + input_location="\${!input_argument_name}" + output_argument_name="\${input_output_mapping[\$input_argument_name]}" + output_location="\${!output_argument_name}" + echo "Publishing \$input_location -> \$output_location" + + echo "Creating directory if it does not exist." + mkdir -p \$(dirname "\$output_location") && echo "Containing directory \$output_location created" + + echo "Copying files..." + cp -rL "\$input_location" "\$output_location" + + echo "Output files for \$output_location:" + ls "\$output_location" +done + +echo "Grouping output from \$par_input_falco into \$par_output_falco" +mkdir -p "\$par_output_falco" +IFS=";" read -ra falco_inputs <<< \$par_input_falco +for falco_dir in "\${falco_inputs[@]}"; do + echo "Copying contents of \$falco_dir" + find -H -D exec "\$falco_dir" -type f -maxdepth 1 -exec cp -t "\$par_output_falco" {} + +done +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then + unset VIASH_TEST_INPUT_FALCO + IFS=';' + for var in $VIASH_PAR_INPUT_FALCO; do + unset IFS + if [ -z "$VIASH_TEST_INPUT_FALCO" ]; then + VIASH_TEST_INPUT_FALCO="$(ViashDockerStripAutomount "$var")" + else + VIASH_TEST_INPUT_FALCO="$VIASH_TEST_INPUT_FALCO;""$(ViashDockerStripAutomount "$var")" + fi + done + VIASH_PAR_INPUT_FALCO="$VIASH_TEST_INPUT_FALCO" + fi + if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ]; then + VIASH_PAR_INPUT_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_INPUT_RUN_INFORMATION" ]; then + VIASH_PAR_INPUT_RUN_INFORMATION=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_RUN_INFORMATION") + fi + if [ ! -z "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS" ]; then + VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ]; then + VIASH_PAR_OUTPUT_FALCO=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_FALCO") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ]; then + VIASH_PAR_OUTPUT_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_MULTIQC") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ]; then + VIASH_PAR_OUTPUT_RUN_INFORMATION=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_RUN_INFORMATION") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ]; then + VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ] && [ ! -e "$VIASH_PAR_OUTPUT_FALCO" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_FALCO' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ] && [ ! -e "$VIASH_PAR_OUTPUT_MULTIQC" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_MULTIQC' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ] && [ ! -e "$VIASH_PAR_OUTPUT_RUN_INFORMATION" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_RUN_INFORMATION' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ] && [ ! -e "$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/io/untar/.config.vsh.yaml b/target/executable/io/untar/.config.vsh.yaml new file mode 100644 index 0000000..b4a2795 --- /dev/null +++ b/target/executable/io/untar/.config.vsh.yaml @@ -0,0 +1,190 @@ +name: "untar" +namespace: "io" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Tarball file to be unpacked." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output" + description: "Directory to write the contents of the .tar file to." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Other arguments" + arguments: + - type: "string" + name: "--exclude" + alternatives: + - "-e" + description: "Prevents any file or member whose name matches the shell wildcard\ + \ (pattern) from being extracted." + info: null + example: + - "docs/figures" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Unpack a .tar file. When the contents of the .tar file is just a single\ + \ directory,\nput the contents of the directory into the output folder instead of\ + \ that directory.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "move-detect-demultiplex-subworkflow" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/io/untar/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/io/untar" + executable: "target/executable/io/untar/untar" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/executable/io/untar/nextflow_labels.config b/target/executable/io/untar/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/executable/io/untar/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/executable/io/untar/untar b/target/executable/io/untar/untar new file mode 100755 index 0000000..3989d5e --- /dev/null +++ b/target/executable/io/untar/untar @@ -0,0 +1,1167 @@ +#!/usr/bin/env bash + +# untar move-detect-demultiplex-subworkflow +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="untar" +VIASH_META_FUNCTIONALITY_NAME="untar" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM debian:stable-slim +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \ + rm -rf /var/lib/apt/lists/* + +LABEL org.opencontainers.image.description="Companion container for running component io untar" +LABEL org.opencontainers.image.created="2025-05-27T14:23:50Z" +LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex" +LABEL org.opencontainers.image.revision="84bb47bfb87cd0420f52ec97c15236311de4c94c" +LABEL org.opencontainers.image.version="move-detect-demultiplex-subworkflow" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "untar move-detect-demultiplex-subworkflow" + echo "" + echo "Unpack a .tar file. When the contents of the .tar file is just a single" + echo "directory," + echo "put the contents of the directory into the output folder instead of that" + echo "directory." + echo "" + echo "Input arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " Tarball file to be unpacked." + echo "" + echo "Output arguments:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " Directory to write the contents of the .tar file to." + echo "" + echo "Other arguments:" + echo " -e, --exclude" + echo " type: string" + echo " example: docs/figures" + echo " Prevents any file or member whose name matches the shell wildcard" + echo " (pattern) from being extracted." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "untar move-detect-demultiplex-subworkflow" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --exclude) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --exclude. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --exclude=*) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'--exclude=*\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE=$(ViashRemoveFlags "$1") + shift 1 + ;; + -e) + [ -n "$VIASH_PAR_EXCLUDE" ] && ViashError Bad arguments for option \'-e\': \'$VIASH_PAR_EXCLUDE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_EXCLUDE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -e. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/demultiplex/io/untar:move-detect-demultiplex-subworkflow' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-untar-XXXXXX").sh +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\"'\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/usr/bin/env bash + +set -eo pipefail + +extra_args=() + +TMPDIR=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "\$TMPDIR" ]] && rm -r "\$TMPDIR" +} +trap clean_up EXIT + +# Check if tarball contains 1 top-level directory. If so, extract the contents of the +# directory to the output directory instead of the directory itself. +echo "Directory contents:" +tar -taf "\${par_input}" > "\$TMPDIR/tar_contents.txt" +cat "\$TMPDIR/tar_contents.txt" + +printf "Checking if tarball contains only a single top-level directory: " +if [[ \$(grep -o -E '^[./]*[^/]+/\$' "\$TMPDIR/tar_contents.txt" | uniq | wc -l) -eq 1 ]]; then + echo "It does." + echo "Extracting the contents of the top-level directory to the output directory instead of the directory itself." + # The directory can be both of the format './' (or ././) or just + # Adjust the number of stripped components accordingly by looking for './' at the beginning of the file. + starting_relative=\$(grep -oP -m 1 '^(./)*' "\$TMPDIR/tar_contents.txt" | tr -d '\\n' | wc -c) + n_strips=\$(( (\$starting_relative / 2)+1 )) + extra_args+=("--strip-components=\$n_strips") +else + echo "It does not." +fi + +if [ "\$par_exclude" != "" ]; then + echo "Exclusion of files with wildcard '\$par_exclude' requested." + extra_args+=("--exclude=\$par_exclude") +fi + +echo "Starting extraction of tarball '\$par_input' to output directory '\$par_output'." +mkdir -p "\$par_output" +echo "executing 'tar --no-same-owner --no-same-permissions --directory=\$par_output \${extra_args[@]} -xavf \$par_input'" +tar --no-same-owner --no-same-permissions --directory="\$par_output" \${extra_args[@]} -xavf "\$par_input" +VIASHMAIN +bash "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/nextflow/dataflow/combine_samples/.config.vsh.yaml b/target/nextflow/dataflow/combine_samples/.config.vsh.yaml new file mode 100644 index 0000000..4797b60 --- /dev/null +++ b/target/nextflow/dataflow/combine_samples/.config.vsh.yaml @@ -0,0 +1,199 @@ +name: "combine_samples" +namespace: "dataflow" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "string" + name: "--id" + description: "ID of the new event" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--forward_input" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--reverse_input" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--falco_dir" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output_forward" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--output_reverse" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--output_falco" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: true + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Combine fastq files from across samples into one event with a list of\ + \ fastq files per orientation." +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/dataflow/combine_samples/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/dataflow/combine_samples" + executable: "target/nextflow/dataflow/combine_samples/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/dataflow/combine_samples/main.nf b/target/nextflow/dataflow/combine_samples/main.nf new file mode 100644 index 0000000..d4e2a13 --- /dev/null +++ b/target/nextflow/dataflow/combine_samples/main.nf @@ -0,0 +1,3442 @@ +// combine_samples move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "combine_samples", + "namespace" : "dataflow", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the new event", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--forward_input", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--reverse_input", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--falco_dir", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output_forward", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_reverse", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_falco", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Combine fastq files from across samples into one event with a list of fastq files per orientation.", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/dataflow/combine_samples/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/dataflow/combine_samples", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + | map { id, state -> + def newEvent = [state.id, state + ["_meta": ["join_id": id]]] + newEvent + } + | groupTuple(by: 0, sort: "hash") + | map {run_id, states -> + // Gather the following state for all samples + def forward_fastqs = states.collect{it.forward_input}.flatten() + def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten() + def falco_dirs = states.collect{it.falco_dir} + + def resultState = [ + "output_forward": forward_fastqs, + "output_reverse": reverse_fastqs, + "output_falco": falco_dirs, + // The join ID is the same across all samples from the same run + "_meta": ["join_id": states[0]._meta.join_id] + ] + return [run_id, resultState] + } + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/dataflow/combine_samples/nextflow.config b/target/nextflow/dataflow/combine_samples/nextflow.config new file mode 100644 index 0000000..6d4d05a --- /dev/null +++ b/target/nextflow/dataflow/combine_samples/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'dataflow/combine_samples' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'Combine fastq files from across samples into one event with a list of fastq files per orientation.' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/dataflow/combine_samples/nextflow_labels.config b/target/nextflow/dataflow/combine_samples/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/dataflow/combine_samples/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/dataflow/combine_samples/nextflow_schema.json b/target/nextflow/dataflow/combine_samples/nextflow_schema.json new file mode 100644 index 0000000..353c08b --- /dev/null +++ b/target/nextflow/dataflow/combine_samples/nextflow_schema.json @@ -0,0 +1,106 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "combine_samples", + "description": "Combine fastq files from across samples into one event with a list of fastq files per orientation.", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "id": { + "type": "string", + "description": "ID of the new event", + "help_text": "Type: `string`, multiple: `False`, required. " + }, + "forward_input": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "exists": true, + "description": "", + "help_text": "Type: `file`, multiple: `True`, required, direction: `input`. " + }, + "reverse_input": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `True`, direction: `input`. " + }, + "falco_dir": { + "type": "string", + "format": "path", + "exists": true, + "description": "", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "output_forward": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `True`, required, default: `\"$id.$key.output_forward_*\"`, direction: `output`. ", + "default": "$id.$key.output_forward_*" + }, + "output_reverse": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `True`, default: `\"$id.$key.output_reverse_*\"`, direction: `output`. ", + "default": "$id.$key.output_reverse_*" + }, + "output_falco": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `True`, required, default: `\"$id.$key.output_falco_*\"`, direction: `output`. ", + "default": "$id.$key.output_falco_*" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml b/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml new file mode 100644 index 0000000..3faf262 --- /dev/null +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/.config.vsh.yaml @@ -0,0 +1,190 @@ +name: "gather_fastqs_and_validate" +namespace: "dataflow" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Directory containing .fastq files" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sample_sheet" + description: "Sample sheet" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--fastq_forward" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--fastq_reverse" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "From a directory containing fastq files, gather the files per sample\ + \ \nand validate according to the contents of the sample sheet.\n" +test_resources: +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_gather_and_validate" +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_undetermined_empty" +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_without_index" +- type: "file" + path: "test_data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/dataflow/gather_fastqs_and_validate/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/dataflow/gather_fastqs_and_validate" + executable: "target/nextflow/dataflow/gather_fastqs_and_validate/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf b/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf new file mode 100644 index 0000000..48f7a73 --- /dev/null +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/main.nf @@ -0,0 +1,3531 @@ +// gather_fastqs_and_validate move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "gather_fastqs_and_validate", + "namespace" : "dataflow", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Directory containing .fastq files", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--sample_sheet", + "description" : "Sample sheet", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--fastq_forward", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_reverse", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "From a directory containing fastq files, gather the files per sample \nand validate according to the contents of the sample sheet.\n", + "test_resources" : [ + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_gather_and_validate" + }, + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_undetermined_empty" + }, + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_without_index" + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/dataflow/gather_fastqs_and_validate/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/dataflow/gather_fastqs_and_validate", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// user-provided Nextflow code +import java.util.zip.GZIPInputStream +import java.nio.file.Files +import java.io.BufferedInputStream + +def is_empty(file_to_check){ + /* + Checks if a file has content + */ + if (file_to_check.size() == 0) { + return true + } + def input_stream = Files.newInputStream(file_to_check) + def gzInputStream + try { + gzInputStream = new GZIPInputStream(new BufferedInputStream(input_stream)) + } catch (java.io.EOFException ex) { + // This is not a gzipfile... + return false + } + def read_one_byte = gzInputStream.read() + return read_one_byte == -1 +} + +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + // Gather input files from BCL convert output folder + | flatMap { id, state -> + println "Processing sample sheet: $state.sample_sheet" + def sample_sheet = state.sample_sheet + def start_parsing = false + def sample_id_column_index = null + def undetermined_sample_name = "Undetermined" + def samples = [undetermined_sample_name] + def original_id = id + + // Parse sample sheet for sample IDs + println "Processing run information file ${sample_sheet}" + csv_lines = sample_sheet.splitCsv(header: false, sep: ',') + csv_lines.any { csv_items -> + if (csv_items.isEmpty() || csv_items[0].startsWith("#")) { + // skip empty or commented line + return + } + def possible_header = csv_items[0] + def header = possible_header.find(/\[(.*)\]/){fullmatch, header_name -> header_name} + if (header) { + if (start_parsing) { + // Stop parsing when encountering the next header + println "Encountered next header '[${start_parsing}]', stopping parsing." + return true + } + // [Data], [BCLConvert_Data] for illumina + // [Samples] or sometimes [SAMPLES] for Element Biosciences + if (header.toLowerCase() in ["data", "samples", "bclconvert_data"]) { + println "Found header [${header}], start parsing." + start_parsing = true + return + } + } + if (start_parsing) { + if ( sample_id_column_index == null) { + println "Looking for sample name column." + sample_id_column_index = csv_items.findIndexValues{it == "Sample_ID" || it == "SampleName"} + assert (!sample_id_column_index.isEmpty()): + "Could not find column 'Sample_ID' (Illumina) or 'SampleName' " + + "(Element Biosciences) in run information! Found: ${sample_id_column_index}" + assert sample_id_column_index.size() == 1, "Expected run information file to contain " + + "a column 'Sample_ID' or 'SampleName', not both. Found: ${sample_id_column_index}" + sample_id_column_index = sample_id_column_index[0] + println "Found sample names column '${csv_items[sample_id_column_index]}'." + return + } + def candidate_sample_id = csv_items[sample_id_column_index] + if (candidate_sample_id?.trim()) { // Don't add empty csv entries. + samples += csv_items[sample_id_column_index] + } + } + // This return is important! (If 'true' is returned, the parsing stops.) + return + } + assert start_parsing: + "Sample information file does not contain [Data], [Samples] or [BCLConvert_Data] header!" + assert samples.size() > 1: + "Sample information file does not seem to contain any information about the samples!" + println "Finished processing run information file, found samples: ${samples}." + println "Looking for fastq files in ${state.input}." + def allfastqs = state.input.listFiles().findAll{it.isFile() && it.name ==~ /^.+\.fastq.gz$/} + println "Found ${allfastqs.size()} fastq files, matching them to the following samples: ${samples}." + processed_samples = samples.collect { sample_id -> + def forward_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R1_(\d+)\.fastq\.gz$/ + def reverse_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R2_(\d+)\.fastq\.gz$/ + // Sort is needed here because multiple lanes (_L00*_) might be present and they need to be in the same order in both lists + def forward_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ forward_regex}.sort() + def reverse_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ reverse_regex}.sort() + assert forward_fastq && !forward_fastq.isEmpty(): "No forward fastq files were found for sample ${sample_id}. " + + "All fastq files in directory: ${allfastqs.collect{it.name}}" + assert (reverse_fastq.isEmpty() || (forward_fastq.size() == reverse_fastq.size())): + "Expected equal number of forward and reverse fastq files for sample ${sample_id}. " + + "Found forward: ${forward_fastq} and reverse: ${reverse_fastq}." + println "Found ${forward_fastq.size()} forward and ${reverse_fastq.size()} reverse " + + "fastq files for sample ${sample_id}" + + assert sample_id == undetermined_sample_name || (forward_fastq.every{!is_empty(it)} && reverse_fastq.every{!is_empty(it)}): + "A fastq file for sample '${sample_id}' appears to be empty!" + def fastqs_state = [ + "fastq_forward": forward_fastq, + "fastq_reverse": reverse_fastq, + "_meta": [ "join_id": original_id ], + ] + [sample_id, fastqs_state] + } + println "Finished processing sample sheet." + return processed_samples + } + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow.config b/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow.config new file mode 100644 index 0000000..89a912f --- /dev/null +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'dataflow/gather_fastqs_and_validate' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'From a directory containing fastq files, gather the files per sample \nand validate according to the contents of the sample sheet.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow_labels.config b/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow_schema.json b/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow_schema.json new file mode 100644 index 0000000..858164d --- /dev/null +++ b/target/nextflow/dataflow/gather_fastqs_and_validate/nextflow_schema.json @@ -0,0 +1,79 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "gather_fastqs_and_validate", + "description": "From a directory containing fastq files, gather the files per sample \nand validate according to the contents of the sample sheet.\n", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Directory containing .fastq files", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "sample_sheet": { + "type": "string", + "format": "path", + "exists": true, + "description": "Sample sheet", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "fastq_forward": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `True`, required, default: `\"$id.$key.fastq_forward_*\"`, direction: `output`. ", + "default": "$id.$key.fastq_forward_*" + }, + "fastq_reverse": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `True`, default: `\"$id.$key.fastq_reverse_*\"`, direction: `output`. ", + "default": "$id.$key.fastq_reverse_*" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/demultiplex/.config.vsh.yaml b/target/nextflow/demultiplex/.config.vsh.yaml new file mode 100644 index 0000000..ba9a261 --- /dev/null +++ b/target/nextflow/demultiplex/.config.vsh.yaml @@ -0,0 +1,305 @@ +name: "demultiplex" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "string" + name: "--id" + description: "Unique identifier for the run" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + description: "Directory containing raw sequencing data" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--run_information" + description: "CSV file containing sample information, which will be used as \n\ + input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\n\ + or 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect\ + \ the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--demultiplexer" + description: "Demultiplexer to use, choice depends on the provider\nof the instrument\ + \ that was used to generate the data.\nWhen not using --sample_sheet, specifying\ + \ this argument is not\nrequired.\n" + info: null + required: false + choices: + - "bases2fastq" + - "bclconvert" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output" + description: "Directory to write fastq data to" + info: null + default: + - "$id/fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_falco" + description: "Directory to write falco output to" + info: null + default: + - "$id/qc/fastqc" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--output_multiqc" + description: "Directory to write falco output to" + info: null + default: + - "$id/qc/multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_run_information" + info: null + default: + - "$id/run_information.csv" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--demultiplexer_logs" + info: null + default: + - "$id/demultiplexer_logs" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Other arguments" + arguments: + - type: "boolean_true" + name: "--skip_copycomplete_check" + description: "Disable the check for the presence of a \"CopyComplete.txt\" file\ + \ in input\ndirectory in case of Illumina data.\n" + info: null + direction: "input" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Demultiplexing of raw sequencing data" +test_resources: +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_illumina" +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_bases2fastq" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "io/untar" + repository: + type: "local" +- name: "dataflow/gather_fastqs_and_validate" + repository: + type: "local" +- name: "io/interop_summary_to_csv" + repository: + type: "local" +- name: "dataflow/combine_samples" + repository: + type: "local" +- name: "bcl_convert" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "bases2fastq" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "falco" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "multiqc" + repository: + type: "vsh" + repo: "biobox" + tag: "v0.3.1" +- name: "detect_demultiplexer" + repository: + type: "local" +repositories: +- type: "vsh" + name: "bb" + repo: "biobox" + tag: "v0.3.1" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/demultiplex/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/demultiplex" + executable: "target/nextflow/demultiplex/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" + dependencies: + - "target/nextflow/io/untar" + - "target/nextflow/dataflow/gather_fastqs_and_validate" + - "target/nextflow/io/interop_summary_to_csv" + - "target/nextflow/dataflow/combine_samples" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco" + - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc" + - "target/nextflow/detect_demultiplexer" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/demultiplex/main.nf b/target/nextflow/demultiplex/main.nf new file mode 100644 index 0000000..b578b2b --- /dev/null +++ b/target/nextflow/demultiplex/main.nf @@ -0,0 +1,3742 @@ +// demultiplex move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "Unique identifier for the run", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Directory containing raw sequencing data", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--run_information", + "description" : "CSV file containing sample information, which will be used as \ninput for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\nor 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--demultiplexer", + "description" : "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n", + "required" : false, + "choices" : [ + "bases2fastq", + "bclconvert" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Directory to write fastq data to", + "default" : [ + "$id/fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_falco", + "description" : "Directory to write falco output to", + "default" : [ + "$id/qc/fastqc" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_multiqc", + "description" : "Directory to write falco output to", + "default" : [ + "$id/qc/multiqc_report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_run_information", + "default" : [ + "$id/run_information.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--demultiplexer_logs", + "default" : [ + "$id/demultiplexer_logs" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Other arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--skip_copycomplete_check", + "description" : "Disable the check for the presence of a \\"CopyComplete.txt\\" file in input\ndirectory in case of Illumina data.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Demultiplexing of raw sequencing data", + "test_resources" : [ + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_illumina" + }, + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_bases2fastq" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "io/untar", + "repository" : { + "type" : "local" + } + }, + { + "name" : "dataflow/gather_fastqs_and_validate", + "repository" : { + "type" : "local" + } + }, + { + "name" : "io/interop_summary_to_csv", + "repository" : { + "type" : "local" + } + }, + { + "name" : "dataflow/combine_samples", + "repository" : { + "type" : "local" + } + }, + { + "name" : "bcl_convert", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "bases2fastq", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "falco", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "multiqc", + "repository" : { + "type" : "vsh", + "repo" : "biobox", + "tag" : "v0.3.1" + } + }, + { + "name" : "detect_demultiplexer", + "repository" : { + "type" : "local" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "bb", + "repo" : "biobox", + "tag" : "v0.3.1" + } + ], + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/demultiplex/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/demultiplex", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { untar } from "${meta.resources_dir}/../../nextflow/io/untar/main.nf" +include { gather_fastqs_and_validate } from "${meta.resources_dir}/../../nextflow/dataflow/gather_fastqs_and_validate/main.nf" +include { interop_summary_to_csv } from "${meta.resources_dir}/../../nextflow/io/interop_summary_to_csv/main.nf" +include { combine_samples } from "${meta.resources_dir}/../../nextflow/dataflow/combine_samples/main.nf" +include { bcl_convert } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/main.nf" +include { bases2fastq } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/main.nf" +include { falco } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/main.nf" +include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf" +include { detect_demultiplexer } from "${meta.resources_dir}/../../nextflow/detect_demultiplexer/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + samples_ch = input_ch + + // untar input if needed + | untar.run( + directives: [label: ["lowmem", "lowcpu"]], + runIf: {id, state -> + def inputStr = state.input.toString() + inputStr.endsWith(".tar.gz") || \ + inputStr.endsWith(".tar") || \ + inputStr.endsWith(".tgz") ? true : false + }, + fromState: [ + "input": "input", + ], + toState: { id, result, state -> + state + ["input": result.output] + }, + ) + + // detect demultiplexer + | detect_demultiplexer.run( + fromState: [ + "input": "input", + "run_information": "run_information", + "demultiplexer": "demultiplexer", + ], + toState: { id, result, state -> + state + [ + "demultiplexer": result.demultiplexer_output, + "run_information": result.run_information_output + ] + } + ) + + | interop_summary_to_csv.run( + runIf: {id, state -> state.demultiplexer in ["bclconvert"]}, + directives: [label: ["lowmem", "verylowcpu"]], + fromState: [ + "input": "input", + ], + toState: [ + "interop_run_summary": "output_run_summary", + "interop_index_summary": "output_index_summary", + ] + ) + // run bcl_convert + | bcl_convert.run( + runIf: {id, state -> state.demultiplexer in ["bclconvert"]}, + directives: [label: ["highmem", "midcpu"]], + fromState: { id, state -> + [ + bcl_input_directory: state.input, + sample_sheet: state.run_information, + output_directory: state.output, + reports: state.demultiplexer_logs, + logs: state.demultiplexer_logs, + ] + }, + toState: {id, result, state -> + def toAdd = [ + "output_demultiplexer" : result.output_directory, + "run_id": id, + "demultiplexer_logs": result.reports, + ] + def newState = state + toAdd + return newState + } + ) + // run bases2fastq + | bases2fastq.run( + runIf: {id, state -> state.demultiplexer in ["bases2fastq"]}, + directives: [label: ["highmem", "midcpu"]], + fromState: { id, state -> + [ + "analysis_directory": state.input, + "run_manifest": state.run_information, + "output_directory": state.output, + "report": state.demultiplexer_logs + "/report.html", + "logs": state.demultiplexer_logs, + ] + }, + args: [ + "no_projects": true, // Do not put output files in a subfolder for project + //"split_lanes": true, + "legacy_fastq": true, // Illumina style output names + "group_fastq": true, // No subdir per sample + ], + toState: {id, result, state -> + def toAdd = [ + "output_demultiplexer" : result.output_directory, + "run_id": id, + "demultiplexer_logs": result.logs, + + ] + def newState = state + toAdd + return newState + } + ) + | gather_fastqs_and_validate.run( + fromState: [ + "input": "output_demultiplexer", + "sample_sheet": "run_information", + ], + toState: [ + "fastq_forward": "fastq_forward", + "fastq_reverse": "fastq_reverse", + ], + ) + + output_ch = samples_ch + | falco.run( + directives: [label: ["verylowcpu", "lowmem"]], + fromState: {id, state -> + [ + "input": [state.fastq_forward, state.fastq_reverse], + "outdir": "$id/qc/falco", + "summary_filename": null, + "report_filename": null, + "data_filename": null, + ] + }, + toState: { id, result, state -> + state + [ "output_falco" : result.outdir ] + } + ) + + | combine_samples.run( + fromState: { id, state -> + [ + "id": state.run_id, + "forward_input": state.fastq_forward, + "reverse_input": state.fastq_reverse, + "falco_dir": state.output_falco, + ] + }, + toState: [ + "forward_fastqs": "output_forward", + "reverse_fastqs": "output_reverse", + "output_falco": "output_falco", + ] + ) + + | multiqc.run( + directives: [label: ["midcpu", "midmem"]], + fromState: {id, state -> + def new_state = [ + "input": state.output_falco, + "output_report": state.output_multiqc, + "cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}' + ] + if (state.demultiplexer == "bclconvert") { + new_state["input"] += [ + state.interop_run_summary.getParent(), + state.interop_index_summary.getParent() + ] + } + return new_state + }, + toState: { id, result, state -> + state + [ "output_multiqc" : result.output_report ] + } + ) + + | setState( + [ + //"_meta": "_meta", + "output": "output_demultiplexer", + "output_falco": "output_falco", + "output_multiqc": "output_multiqc", + "output_run_information": "run_information", + "demultiplexer_logs": "demultiplexer_logs" + ] + ) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/demultiplex/nextflow.config b/target/nextflow/demultiplex/nextflow.config new file mode 100644 index 0000000..89ab227 --- /dev/null +++ b/target/nextflow/demultiplex/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'demultiplex' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'Demultiplexing of raw sequencing data' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/demultiplex/nextflow_labels.config b/target/nextflow/demultiplex/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/demultiplex/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/demultiplex/nextflow_schema.json b/target/nextflow/demultiplex/nextflow_schema.json new file mode 100644 index 0000000..edcb15f --- /dev/null +++ b/target/nextflow/demultiplex/nextflow_schema.json @@ -0,0 +1,128 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "demultiplex", + "description": "Demultiplexing of raw sequencing data", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the run", + "help_text": "Type: `string`, multiple: `False`. " + }, + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Directory containing raw sequencing data", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "run_information": { + "type": "string", + "format": "path", + "description": "CSV file containing sample information, which will be used as \ninput for the demultiplexer", + "help_text": "Type: `file`, multiple: `False`, direction: `input`. " + }, + "demultiplexer": { + "type": "string", + "description": "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n", + "help_text": "Type: `string`, multiple: `False`, choices: ``bases2fastq`, `bclconvert``. ", + "enum": [ + "bases2fastq", + "bclconvert" + ] + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "output": { + "type": "string", + "format": "path", + "description": "Directory to write fastq data to", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id/fastq\"`, direction: `output`. ", + "default": "$id/fastq" + }, + "output_falco": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "description": "Directory to write falco output to", + "help_text": "Type: `file`, multiple: `True`, default: `[\"$id/qc/fastqc\"]`, direction: `output`. ", + "default": [ + "$id/qc/fastqc" + ] + }, + "output_multiqc": { + "type": "string", + "format": "path", + "description": "Directory to write falco output to", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id/qc/multiqc_report.html\"`, direction: `output`. ", + "default": "$id/qc/multiqc_report.html" + }, + "output_run_information": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id/run_information.csv\"`, direction: `output`. ", + "default": "$id/run_information.csv" + }, + "demultiplexer_logs": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id/demultiplexer_logs\"`, direction: `output`. ", + "default": "$id/demultiplexer_logs" + } + } + }, + "other arguments": { + "title": "Other arguments", + "type": "object", + "description": "No description", + "properties": { + "skip_copycomplete_check": { + "type": "boolean", + "description": "Disable the check for the presence of a \"CopyComplete.txt\" file in input\ndirectory in case of Illumina data.\n", + "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ", + "default": false + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/other arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/detect_demultiplexer/.config.vsh.yaml b/target/nextflow/detect_demultiplexer/.config.vsh.yaml new file mode 100644 index 0000000..0378e98 --- /dev/null +++ b/target/nextflow/detect_demultiplexer/.config.vsh.yaml @@ -0,0 +1,196 @@ +name: "detect_demultiplexer" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Arguments" + arguments: + - type: "string" + name: "--id" + description: "Unique identifier for the run" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + description: "Directory containing raw sequencing data" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--run_information" + description: "CSV file containing sample information, which will be used as \n\ + input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\n\ + or 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect\ + \ the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--demultiplexer" + description: "Demultiplexer to use, choice depends on the provider\nof the instrument\ + \ that was used to generate the data.\nWhen not using --sample_sheet, specifying\ + \ this argument is not\nrequired.\n" + info: null + required: false + choices: + - "bases2fastq" + - "bclconvert" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--demultiplexer_output" + description: "Output demultiplexer" + info: null + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--run_information_output" + description: "Output run information" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Detects the demultiplexer used to generate the fastq files and runs\ + \ the appropriate\ndemultiplexing workflow.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/detect_demultiplexer/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/detect_demultiplexer" + executable: "target/nextflow/detect_demultiplexer/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/detect_demultiplexer/main.nf b/target/nextflow/detect_demultiplexer/main.nf new file mode 100644 index 0000000..f15b42f --- /dev/null +++ b/target/nextflow/detect_demultiplexer/main.nf @@ -0,0 +1,3497 @@ +// detect_demultiplexer move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "detect_demultiplexer", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "Unique identifier for the run", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Directory containing raw sequencing data", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--run_information", + "description" : "CSV file containing sample information, which will be used as \ninput for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\nor 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--demultiplexer", + "description" : "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n", + "required" : false, + "choices" : [ + "bases2fastq", + "bclconvert" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--demultiplexer_output", + "description" : "Output demultiplexer", + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--run_information_output", + "description" : "Output run information", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Detects the demultiplexer used to generate the fastq files and runs the appropriate\ndemultiplexing workflow.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/detect_demultiplexer/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/detect_demultiplexer", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch // Channel with [id, state] pairs + + main: + output_ch = input_ch + + // Gather input files from folder + | map {id, state -> + def newState = [:] + println("Provided run information: ${state.run_information} and demultiplexer: ${state.demultiplexer}") + // No auto-detection of run information file (it is user provided), + // in this case the demultiplexer should also be specified. + assert (!state.run_information || state.demultiplexer): "When setting --run_information, " + + "you must also provide a demultiplexer" + + if (!state.run_information) { + println("Run information was not specified, auto-detecting...") + // The supported_platforms hashmap must be a 1-on-1 mapping + // Also, it's keys must be present in the 'choices' field + // for the 'run_information' argument in the viash config. + def supported_platforms = [ + "bclconvert": "SampleSheet.csv", // Illumina + "bases2fastq": "RunManifest.csv" // Element Biosciences + ] + def found_sample_information = supported_platforms.collectEntries{demultiplexer, filename -> + println("Checking if ${filename} can be found in input folder ${state.input}.") + def resolved_filename = state.input.resolve(filename) + if (!resolved_filename.isFile()) { + resolved_filename = null + } + println("Result after looking for run information for ${demultiplexer}: ${resolved_filename}.") + [demultiplexer, resolved_filename] + } + def demultiplexer = null + def run_information = null + found_sample_information.each{demultiplexer_candidate, file_path -> + if (file_path) { + // At this point, a candicate run information file was found. + assert !run_information: "Autodetection of run information " + + "(SampleSheet, RunManifest) failed: " + + "multiple candidate files found in input folder. " + + "Please specify one using --run_information." + run_information = file_path + demultiplexer = demultiplexer_candidate + } + } + + // When autodetecting, the run information should have been found + assert run_information: "No run information file (SampleSheet, RunManifest) " + + "found in input directory." + + // When autodetecting, the demultiplexer must be set if the run information was found + assert demultiplexer: "State error: the demultiplexer should have been autodetected. " + + "Please report this as a bug." + + // When autodetecting, the found demultiplexer must match + // with the demultiplexer that the user has provided (in case it was provided). + if (state.demultiplexer) { + assert state.demultiplexer == demultiplexer, + "Requested to use demultiplexer ${state.demultiplexer} " + + "but demultiplexer based on the autodetected run information " + "file ${run_information} seems to indicate that the demultiplexer " + "should be ${demultiplexer}. Either avoid specifying the demultiplexer " + "or override the autodetection of the run information by providing " + "the file." + } + println("Using run information ${run_information} and demultiplexer ${demultiplexer}") + // At this point, the autodetected state can override the user provided state. + newState = newState + [ + "run_information": run_information, + "demultiplexer": demultiplexer, + ] + } // end auto-detection logic + + if (newState.demultiplexer in ["bclconvert"]) { + // Do not add InterOp to state because we generate the summary csv's in the next + // step based on the run dir, not the InterOp dir. + def interop_dir = state.input.resolve("InterOp") + assert interop_dir.isDirectory(): "Expected InterOp directory to be present." + + def copycomplete_file = state.input.resolve("CopyComplete.txt") + assert (copycomplete_file.isFile() || state.skip_copycomplete_check): + "'CopyComplete.txt' file was not found!" + } + + def resultState = state + newState + [id, resultState] + } + + | setState(["demultiplexer_output": "demultiplexer", + "run_information_output": "run_information"]) + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/detect_demultiplexer/nextflow.config b/target/nextflow/detect_demultiplexer/nextflow.config new file mode 100644 index 0000000..29d079d --- /dev/null +++ b/target/nextflow/detect_demultiplexer/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'detect_demultiplexer' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'Detects the demultiplexer used to generate the fastq files and runs the appropriate\ndemultiplexing workflow.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/detect_demultiplexer/nextflow_labels.config b/target/nextflow/detect_demultiplexer/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/detect_demultiplexer/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/detect_demultiplexer/nextflow_schema.json b/target/nextflow/detect_demultiplexer/nextflow_schema.json new file mode 100644 index 0000000..4594fa0 --- /dev/null +++ b/target/nextflow/detect_demultiplexer/nextflow_schema.json @@ -0,0 +1,74 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "detect_demultiplexer", + "description": "Detects the demultiplexer used to generate the fastq files and runs the appropriate\ndemultiplexing workflow.\n", + "type": "object", + "$defs": { + "arguments": { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the run", + "help_text": "Type: `string`, multiple: `False`. " + }, + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Directory containing raw sequencing data", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "run_information": { + "type": "string", + "format": "path", + "description": "CSV file containing sample information, which will be used as \ninput for the demultiplexer", + "help_text": "Type: `file`, multiple: `False`, direction: `input`. " + }, + "demultiplexer": { + "type": "string", + "description": "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n", + "help_text": "Type: `string`, multiple: `False`, choices: ``bases2fastq`, `bclconvert``. ", + "enum": [ + "bases2fastq", + "bclconvert" + ] + }, + "demultiplexer_output": { + "type": "string", + "description": "Output demultiplexer", + "help_text": "Type: `string`, multiple: `False`. " + }, + "run_information_output": { + "type": "string", + "format": "path", + "description": "Output run information", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.run_information_output\"`, direction: `output`. ", + "default": "$id.$key.run_information_output" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml b/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml new file mode 100644 index 0000000..b6838ee --- /dev/null +++ b/target/nextflow/io/interop_summary_to_csv/.config.vsh.yaml @@ -0,0 +1,191 @@ +name: "interop_summary_to_csv" +namespace: "io" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Sequencing run folder (*not* InterOp folder)." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output_run_summary" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_index_summary" + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "iseq-DI" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "summary" + - "index-summary" + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "move-detect-demultiplex-subworkflow" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "wget" + interactive: false + - type: "docker" + run: + - "wget https://github.com/Illumina/interop/releases/download/v1.3.1/interop-1.3.1-Linux-GNU.tar.gz\ + \ -O /tmp/interop.tar.gz && \\\ntar -C /tmp/ --no-same-owner --no-same-permissions\ + \ -xvf /tmp/interop.tar.gz && \\\nmv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary\ + \ /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/io/interop_summary_to_csv/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/io/interop_summary_to_csv" + executable: "target/nextflow/io/interop_summary_to_csv/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/io/interop_summary_to_csv/main.nf b/target/nextflow/io/interop_summary_to_csv/main.nf new file mode 100644 index 0000000..3e0b126 --- /dev/null +++ b/target/nextflow/io/interop_summary_to_csv/main.nf @@ -0,0 +1,3817 @@ +// interop_summary_to_csv move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "interop_summary_to_csv", + "namespace" : "io", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Sequencing run folder (*not* InterOp folder).", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output_run_summary", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_index_summary", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/testData/iseq-DI" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "summary", + "index-summary", + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:stable-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "move-detect-demultiplex-subworkflow", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "wget" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "wget https://github.com/Illumina/interop/releases/download/v1.3.1/interop-1.3.1-Linux-GNU.tar.gz -O /tmp/interop.tar.gz && \\\\\ntar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \\\\\nmv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/io/interop_summary_to_csv/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/io/interop_summary_to_csv", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_RUN_SUMMARY+x} ]; then echo "${VIASH_PAR_OUTPUT_RUN_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_run_summary='&'#" ; else echo "# par_output_run_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_INDEX_SUMMARY+x} ]; then echo "${VIASH_PAR_OUTPUT_INDEX_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_index_summary='&'#" ; else echo "# par_output_index_summary="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/usr/bin/env bash + +set -eo pipefail + +if [ ! -d "\\$par_input" ]; then + echo "Input directory does not exist or is not a directory" + exit 1 +fi +\\$(which summary) --csv=1 "\\$par_input" 1> "\\$par_output_run_summary" +\\$(which index-summary) --csv=1 "\\$par_input" 1> "\\$par_output_index_summary" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/demultiplex/io/interop_summary_to_csv", + "tag" : "move-detect-demultiplex-subworkflow" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/io/interop_summary_to_csv/nextflow.config b/target/nextflow/io/interop_summary_to_csv/nextflow.config new file mode 100644 index 0000000..432eace --- /dev/null +++ b/target/nextflow/io/interop_summary_to_csv/nextflow.config @@ -0,0 +1,124 @@ +manifest { + name = 'io/interop_summary_to_csv' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/io/interop_summary_to_csv/nextflow_labels.config b/target/nextflow/io/interop_summary_to_csv/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/io/interop_summary_to_csv/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/io/interop_summary_to_csv/nextflow_schema.json b/target/nextflow/io/interop_summary_to_csv/nextflow_schema.json new file mode 100644 index 0000000..7e7f555 --- /dev/null +++ b/target/nextflow/io/interop_summary_to_csv/nextflow_schema.json @@ -0,0 +1,66 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "interop_summary_to_csv", + "description": "No description", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Sequencing run folder (*not* InterOp folder).", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "output_run_summary": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output_run_summary\"`, direction: `output`. ", + "default": "$id.$key.output_run_summary" + }, + "output_index_summary": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output_index_summary\"`, direction: `output`. ", + "default": "$id.$key.output_index_summary" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/io/publish/.config.vsh.yaml b/target/nextflow/io/publish/.config.vsh.yaml new file mode 100644 index 0000000..a7bf917 --- /dev/null +++ b/target/nextflow/io/publish/.config.vsh.yaml @@ -0,0 +1,253 @@ +name: "publish" +namespace: "io" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Directory to write fastq data to" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_falco" + description: "Directory to write falco output to" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--input_multiqc" + description: "Location where to write the MultiQC report to." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_run_information" + description: "Location where to write the run information to." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_demultiplexer_logs" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output" + info: null + default: + - "fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_falco" + info: null + default: + - "qc/fastqc" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_multiqc" + info: null + default: + - "qc/multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_run_information" + info: null + default: + - "run_information.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_demultiplexer_logs" + info: null + default: + - "demultiplexer_logs" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "code.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Publish the processed results of the run" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "move-detect-demultiplex-subworkflow" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/io/publish/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/io/publish" + executable: "target/nextflow/io/publish/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/io/publish/main.nf b/target/nextflow/io/publish/main.nf new file mode 100644 index 0000000..1e5d64e --- /dev/null +++ b/target/nextflow/io/publish/main.nf @@ -0,0 +1,3917 @@ +// publish move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "publish", + "namespace" : "io", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Directory to write fastq data to", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_falco", + "description" : "Directory to write falco output to", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_multiqc", + "description" : "Location where to write the MultiQC report to.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_run_information", + "description" : "Location where to write the run information to.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_demultiplexer_logs", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "default" : [ + "fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_falco", + "default" : [ + "qc/fastqc" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_multiqc", + "default" : [ + "qc/multiqc_report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_run_information", + "default" : [ + "run_information.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_demultiplexer_logs", + "default" : [ + "demultiplexer_logs" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "./code.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Publish the processed results of the run", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:stable-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "move-detect-demultiplex-subworkflow", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/io/publish/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/io/publish", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FALCO+x} ]; then echo "${VIASH_PAR_INPUT_FALCO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_falco='&'#" ; else echo "# par_input_falco="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_INPUT_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_multiqc='&'#" ; else echo "# par_input_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_INPUT_RUN_INFORMATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_run_information='&'#" ; else echo "# par_input_run_information="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_demultiplexer_logs='&'#" ; else echo "# par_input_demultiplexer_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_FALCO+x} ]; then echo "${VIASH_PAR_OUTPUT_FALCO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_falco='&'#" ; else echo "# par_output_falco="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_OUTPUT_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_multiqc='&'#" ; else echo "# par_output_multiqc="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_OUTPUT_RUN_INFORMATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_run_information='&'#" ; else echo "# par_output_run_information="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_demultiplexer_logs='&'#" ; else echo "# par_output_demultiplexer_logs="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/bin/bash + +set -eo pipefail + +declare -A input_output_mapping=(["par_input"]="par_output" + ["par_input_multiqc"]="par_output_multiqc" + ["par_input_run_information"]="par_output_run_information" + ["par_input_demultiplexer_logs"]="par_output_demultiplexer_logs" + ) + +for input_argument_name in "\\${!input_output_mapping[@]}" +do + input_location="\\${!input_argument_name}" + output_argument_name="\\${input_output_mapping[\\$input_argument_name]}" + output_location="\\${!output_argument_name}" + echo "Publishing \\$input_location -> \\$output_location" + + echo "Creating directory if it does not exist." + mkdir -p \\$(dirname "\\$output_location") && echo "Containing directory \\$output_location created" + + echo "Copying files..." + cp -rL "\\$input_location" "\\$output_location" + + echo "Output files for \\$output_location:" + ls "\\$output_location" +done + +echo "Grouping output from \\$par_input_falco into \\$par_output_falco" +mkdir -p "\\$par_output_falco" +IFS=";" read -ra falco_inputs <<< \\$par_input_falco +for falco_dir in "\\${falco_inputs[@]}"; do + echo "Copying contents of \\$falco_dir" + find -H -D exec "\\$falco_dir" -type f -maxdepth 1 -exec cp -t "\\$par_output_falco" {} + +done +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/demultiplex/io/publish", + "tag" : "move-detect-demultiplex-subworkflow" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/io/publish/nextflow.config b/target/nextflow/io/publish/nextflow.config new file mode 100644 index 0000000..563fb8b --- /dev/null +++ b/target/nextflow/io/publish/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'io/publish' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'Publish the processed results of the run' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/io/publish/nextflow_labels.config b/target/nextflow/io/publish/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/io/publish/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/io/publish/nextflow_schema.json b/target/nextflow/io/publish/nextflow_schema.json new file mode 100644 index 0000000..9b82928 --- /dev/null +++ b/target/nextflow/io/publish/nextflow_schema.json @@ -0,0 +1,118 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "publish", + "description": "Publish the processed results of the run", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Directory to write fastq data to", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "input_falco": { + "type": "array", + "items": { + "type": "string" + }, + "format": "path", + "exists": true, + "description": "Directory to write falco output to", + "help_text": "Type: `file`, multiple: `True`, required, direction: `input`. " + }, + "input_multiqc": { + "type": "string", + "format": "path", + "exists": true, + "description": "Location where to write the MultiQC report to.", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "input_run_information": { + "type": "string", + "format": "path", + "exists": true, + "description": "Location where to write the run information to.", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "input_demultiplexer_logs": { + "type": "string", + "format": "path", + "exists": true, + "description": "", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "output": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"fastq\"`, direction: `output`. ", + "default": "fastq" + }, + "output_falco": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"qc/fastqc\"`, direction: `output`. ", + "default": "qc/fastqc" + }, + "output_multiqc": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"qc/multiqc_report.html\"`, direction: `output`. ", + "default": "qc/multiqc_report.html" + }, + "output_run_information": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"run_information.csv\"`, direction: `output`. ", + "default": "run_information.csv" + }, + "output_demultiplexer_logs": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"demultiplexer_logs\"`, direction: `output`. ", + "default": "demultiplexer_logs" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/io/untar/.config.vsh.yaml b/target/nextflow/io/untar/.config.vsh.yaml new file mode 100644 index 0000000..450bf28 --- /dev/null +++ b/target/nextflow/io/untar/.config.vsh.yaml @@ -0,0 +1,190 @@ +name: "untar" +namespace: "io" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Tarball file to be unpacked." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output arguments" + arguments: + - type: "file" + name: "--output" + description: "Directory to write the contents of the .tar file to." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Other arguments" + arguments: + - type: "string" + name: "--exclude" + alternatives: + - "-e" + description: "Prevents any file or member whose name matches the shell wildcard\ + \ (pattern) from being extracted." + info: null + example: + - "docs/figures" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Unpack a .tar file. When the contents of the .tar file is just a single\ + \ directory,\nput the contents of the directory into the output folder instead of\ + \ that directory.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "move-detect-demultiplex-subworkflow" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + interactive: false + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/io/untar/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/io/untar" + executable: "target/nextflow/io/untar/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/io/untar/main.nf b/target/nextflow/io/untar/main.nf new file mode 100644 index 0000000..cb9c8c7 --- /dev/null +++ b/target/nextflow/io/untar/main.nf @@ -0,0 +1,3846 @@ +// untar move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "untar", + "namespace" : "io", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Tarball file to be unpacked.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Directory to write the contents of the .tar file to.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Other arguments", + "arguments" : [ + { + "type" : "string", + "name" : "--exclude", + "alternatives" : [ + "-e" + ], + "description" : "Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted.", + "example" : [ + "docs/figures" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "debian:stable-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "move-detect-demultiplex-subworkflow", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps" + ], + "interactive" : false + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/io/untar/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/io/untar", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_EXCLUDE+x} ]; then echo "${VIASH_PAR_EXCLUDE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_exclude='&'#" ; else echo "# par_exclude="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END +#!/usr/bin/env bash + +set -eo pipefail + +extra_args=() + +TMPDIR=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXX") +function clean_up { + [[ -d "\\$TMPDIR" ]] && rm -r "\\$TMPDIR" +} +trap clean_up EXIT + +# Check if tarball contains 1 top-level directory. If so, extract the contents of the +# directory to the output directory instead of the directory itself. +echo "Directory contents:" +tar -taf "\\${par_input}" > "\\$TMPDIR/tar_contents.txt" +cat "\\$TMPDIR/tar_contents.txt" + +printf "Checking if tarball contains only a single top-level directory: " +if [[ \\$(grep -o -E '^[./]*[^/]+/\\$' "\\$TMPDIR/tar_contents.txt" | uniq | wc -l) -eq 1 ]]; then + echo "It does." + echo "Extracting the contents of the top-level directory to the output directory instead of the directory itself." + # The directory can be both of the format './' (or ././) or just + # Adjust the number of stripped components accordingly by looking for './' at the beginning of the file. + starting_relative=\\$(grep -oP -m 1 '^(./)*' "\\$TMPDIR/tar_contents.txt" | tr -d '\\\\n' | wc -c) + n_strips=\\$(( (\\$starting_relative / 2)+1 )) + extra_args+=("--strip-components=\\$n_strips") +else + echo "It does not." +fi + +if [ "\\$par_exclude" != "" ]; then + echo "Exclusion of files with wildcard '\\$par_exclude' requested." + extra_args+=("--exclude=\\$par_exclude") +fi + +echo "Starting extraction of tarball '\\$par_input' to output directory '\\$par_output'." +mkdir -p "\\$par_output" +echo "executing 'tar --no-same-owner --no-same-permissions --directory=\\$par_output \\${extra_args[@]} -xavf \\$par_input'" +tar --no-same-owner --no-same-permissions --directory="\\$par_output" \\${extra_args[@]} -xavf "\\$par_input" +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/demultiplex/io/untar", + "tag" : "move-detect-demultiplex-subworkflow" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/io/untar/nextflow.config b/target/nextflow/io/untar/nextflow.config new file mode 100644 index 0000000..57e4625 --- /dev/null +++ b/target/nextflow/io/untar/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'io/untar' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/io/untar/nextflow_labels.config b/target/nextflow/io/untar/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/io/untar/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/io/untar/nextflow_schema.json b/target/nextflow/io/untar/nextflow_schema.json new file mode 100644 index 0000000..d9c63bc --- /dev/null +++ b/target/nextflow/io/untar/nextflow_schema.json @@ -0,0 +1,74 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "untar", + "description": "Unpack a .tar file. When the contents of the .tar file is just a single directory,\nput the contents of the directory into the output folder instead of that directory.\n", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Tarball file to be unpacked.", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "output": { + "type": "string", + "format": "path", + "description": "Directory to write the contents of the .tar file to.", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output\"`, direction: `output`. ", + "default": "$id.$key.output" + } + } + }, + "other arguments": { + "title": "Other arguments", + "type": "object", + "description": "No description", + "properties": { + "exclude": { + "type": "string", + "description": "Prevents any file or member whose name matches the shell wildcard (pattern) from being extracted.", + "help_text": "Type: `string`, multiple: `False`, example: `\"docs/figures\"`. " + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/other arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/runner/.config.vsh.yaml b/target/nextflow/runner/.config.vsh.yaml new file mode 100644 index 0000000..385a4e7 --- /dev/null +++ b/target/nextflow/runner/.config.vsh.yaml @@ -0,0 +1,243 @@ +name: "runner" +version: "move-detect-demultiplex-subworkflow" +argument_groups: +- name: "Input arguments" + arguments: + - type: "file" + name: "--input" + description: "Base directory of the canonical form `s3://///`.\n\ + A tarball (tar.gz, .tgz, .tar) containing run information can be provided in\ + \ which\ncase the RunID is set to the name of the tarball without the extension.\n" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--run_information" + description: "CSV file containing sample information, which will be used as \n\ + input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\n\ + or 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect\ + \ the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--demultiplexer" + description: "Demultiplexer to use, choice depends on the provider\nof the instrument\ + \ that was used to generate the data.\nWhen not using --sample_sheet, specifying\ + \ this argument is not\nrequired.\n" + info: null + required: false + choices: + - "bases2fastq" + - "bclconvert" + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Annotation flags" + arguments: + - type: "boolean_true" + name: "--plain_output" + description: "Flag to indicate that the output should be stored directly under\ + \ $publish_dir rather than\nunder a subdirectory structure runID/_demultiplex_/.\n" + info: null + direction: "input" +- name: "Output arguments" + arguments: + - type: "file" + name: "--fastq_output" + info: null + default: + - "fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--falco_output" + info: null + default: + - "qc/fastqc" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--multiqc_output" + info: null + default: + - "qc/multiqc_report.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--demultiplexer_logs" + info: null + default: + - "demultiplexer_logs" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Other arguments" + arguments: + - type: "boolean_true" + name: "--skip_copycomplete_check" + description: "Disable the check for the presence of a \"CopyComplete.txt\" file\ + \ in input\ndirectory in case of Illumina data.\n" + info: null + direction: "input" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Runner for demultiplexing of raw sequencing data" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "demultiplex" + repository: + type: "local" +- name: "io/publish" + repository: + type: "local" +license: "MIT" +links: + repository: "https://github.com/viash-hub/demultiplex" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/runner/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/runner" + executable: "target/nextflow/runner/main.nf" + viash_version: "0.9.4" + git_commit: "84bb47bfb87cd0420f52ec97c15236311de4c94c" + git_remote: "https://github.com/viash-hub/demultiplex" + git_tag: "v0.1.1-31-g84bb47b" + dependencies: + - "target/nextflow/demultiplex" + - "target/nextflow/io/publish" +package_config: + name: "demultiplex" + version: "move-detect-demultiplex-subworkflow" + description: "Demultiplexing pipeline\n" + info: + test_resources: + - path: "gs://viash-hub-resources/demultiplex/v3" + dest: "testData" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag\ + \ := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + keywords: + - "bioinformatics" + - "sequence" + - "demultiplexing" + - "pipeline" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/demultiplex" + issue_tracker: "https://github.com/viash-hub/demultiplex/issues" diff --git a/target/nextflow/runner/main.nf b/target/nextflow/runner/main.nf new file mode 100644 index 0000000..c637ab2 --- /dev/null +++ b/target/nextflow/runner/main.nf @@ -0,0 +1,3560 @@ +// runner move-detect-demultiplex-subworkflow +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "runner", + "version" : "move-detect-demultiplex-subworkflow", + "argument_groups" : [ + { + "name" : "Input arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Base directory of the canonical form `s3://///`.\nA tarball (tar.gz, .tgz, .tar) containing run information can be provided in which\ncase the RunID is set to the name of the tarball without the extension.\n", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--run_information", + "description" : "CSV file containing sample information, which will be used as \ninput for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\nor 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--demultiplexer", + "description" : "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n", + "required" : false, + "choices" : [ + "bases2fastq", + "bclconvert" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Annotation flags", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--plain_output", + "description" : "Flag to indicate that the output should be stored directly under $publish_dir rather than\nunder a subdirectory structure runID/_demultiplex_/.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Output arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--fastq_output", + "default" : [ + "fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--falco_output", + "default" : [ + "qc/fastqc" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--multiqc_output", + "default" : [ + "qc/multiqc_report.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--demultiplexer_logs", + "default" : [ + "demultiplexer_logs" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Other arguments", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--skip_copycomplete_check", + "description" : "Disable the check for the presence of a \\"CopyComplete.txt\\" file in input\ndirectory in case of Illumina data.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Runner for demultiplexing of raw sequencing data", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "demultiplex", + "repository" : { + "type" : "local" + } + }, + { + "name" : "io/publish", + "repository" : { + "type" : "local" + } + } + ], + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/runner/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/runner", + "viash_version" : "0.9.4", + "git_commit" : "84bb47bfb87cd0420f52ec97c15236311de4c94c", + "git_remote" : "https://github.com/viash-hub/demultiplex", + "git_tag" : "v0.1.1-31-g84bb47b" + }, + "package_config" : { + "name" : "demultiplex", + "version" : "move-detect-demultiplex-subworkflow", + "description" : "Demultiplexing pipeline\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/demultiplex/v3", + "dest" : "testData" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands += ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'move-detect-demultiplex-subworkflow'" + ], + "keywords" : [ + "bioinformatics", + "sequence", + "demultiplexing", + "pipeline" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/demultiplex", + "issue_tracker" : "https://github.com/viash-hub/demultiplex/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { demultiplex } from "${meta.resources_dir}/../../nextflow/demultiplex/main.nf" +include { publish } from "${meta.resources_dir}/../../nextflow/io/publish/main.nf" + +// inner workflow +// user-provided Nextflow code +def date = new Date().format('yyyyMMdd_hhmmss') + +def viash_config = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString() + "/_viash.yaml" +def version = get_version(viash_config) + +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + // Extract the ID from the input. + // If the input is a tarball, strip the suffix. + | map{ id, state -> + def id_with_suffix = state.input.getFileName().toString() + [ + id, + state + [ run_id: id_with_suffix - ~/\.(tar.gz|tgz|tar)$/ ] + ] + } + | demultiplex.run( + fromState: { id, state -> + def state_to_pass = [ + "input": state.input, + "run_information": state.run_information, + "demultiplexer": state.demultiplexer, + "skip_copycomplete_check": state.skip_copycomplete_check, + "output": "$id/fastq", + "output_falco": "$id/qc/fastqc", + "output_multiqc": "$id/qc/multiqc_report.html", + "demultiplexer_logs": "$id/demultiplexer_logs", + ] + if (state.run_information) { + state_to_pass += ["output_run_information": state.run_information.getName()] + } + state_to_pass + }, + toState: { id, result, state -> + state + result + }, + ) + | publish.run( + fromState: { id, state -> + println(state.plain_output) + def id1 = (state.plain_output) ? id : "${state.run_id}/${date}" + def id2 = (state.plain_output) ? id : "${id1}_demultiplex_${version}" + + def fastq_output_1 = (id2 == "run") ? state.fastq_output : "${id2}/" + state.fastq_output + def falco_output_1 = (id2 == "run") ? state.falco_output : "${id2}/" + state.falco_output + def multiqc_output_1 = (id2 == "run") ? state.multiqc_output : "${id2}/" + state.multiqc_output + def run_information_output_1 = (id2 == "run") ? "${state.output_run_information.getName()}" : "${id2}/${state.output_run_information.getName()}" + def demultiplexer_logs_output = (id2 == "run") ? state.demultiplexer_logs : "${id2}/${state.demultiplexer_logs.getName()}" + + if (id2 == "run") { + println("Publising to ${params.publish_dir}") + } else { + println("Publising to ${params.publish_dir}/${id2}") + } + + [ + input: state.output, + input_falco: state.output_falco, + input_multiqc: state.output_multiqc, + input_run_information: state.output_run_information, + input_demultiplexer_logs: state.demultiplexer_logs, + output: fastq_output_1, + output_falco: falco_output_1, + output_multiqc: multiqc_output_1, + output_run_information: run_information_output_1, + output_demultiplexer_logs: demultiplexer_logs_output, + ] + }, + toState: { id, result, state -> [:] }, + directives: [ + publishDir: [ + path: "${params.publish_dir}", + overwrite: false, + mode: "copy" + ] + ] + ) + + emit: + output_ch +} + +def get_version(inputFile) { + def yamlSlurper = new groovy.yaml.YamlSlurper() + def loaded_viash_config = yamlSlurper.parse(file(inputFile)) + def version = (loaded_viash_config.version) ? loaded_viash_config.version : "unknown_version" + println("Version to be used: ${version}") + return version +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/runner/nextflow.config b/target/nextflow/runner/nextflow.config new file mode 100644 index 0000000..1bc515d --- /dev/null +++ b/target/nextflow/runner/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'runner' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'move-detect-demultiplex-subworkflow' + description = 'Runner for demultiplexing of raw sequencing data' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/runner/nextflow_labels.config b/target/nextflow/runner/nextflow_labels.config new file mode 100644 index 0000000..e7ab7b8 --- /dev/null +++ b/target/nextflow/runner/nextflow_labels.config @@ -0,0 +1,98 @@ +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 12 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/runner/nextflow_schema.json b/target/nextflow/runner/nextflow_schema.json new file mode 100644 index 0000000..af79684 --- /dev/null +++ b/target/nextflow/runner/nextflow_schema.json @@ -0,0 +1,127 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "runner", + "description": "Runner for demultiplexing of raw sequencing data", + "type": "object", + "$defs": { + "input arguments": { + "title": "Input arguments", + "type": "object", + "description": "No description", + "properties": { + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Base directory of the canonical form `s3://///`.\nA tarball (tar.gz, .tgz, .tar) containing run information can be provided in which\ncase the RunID is set to the name of the tarball without the extension.\n", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. " + }, + "run_information": { + "type": "string", + "format": "path", + "description": "CSV file containing sample information, which will be used as \ninput for the demultiplexer", + "help_text": "Type: `file`, multiple: `False`, direction: `input`. " + }, + "demultiplexer": { + "type": "string", + "description": "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n", + "help_text": "Type: `string`, multiple: `False`, choices: ``bases2fastq`, `bclconvert``. ", + "enum": [ + "bases2fastq", + "bclconvert" + ] + } + } + }, + "annotation flags": { + "title": "Annotation flags", + "type": "object", + "description": "No description", + "properties": { + "plain_output": { + "type": "boolean", + "description": "Flag to indicate that the output should be stored directly under $publish_dir rather than\nunder a subdirectory structure runID/_demultiplex_/.\n", + "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ", + "default": false + } + } + }, + "output arguments": { + "title": "Output arguments", + "type": "object", + "description": "No description", + "properties": { + "fastq_output": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"fastq\"`, direction: `output`. ", + "default": "fastq" + }, + "falco_output": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"qc/fastqc\"`, direction: `output`. ", + "default": "qc/fastqc" + }, + "multiqc_output": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"qc/multiqc_report.html\"`, direction: `output`. ", + "default": "qc/multiqc_report.html" + }, + "demultiplexer_logs": { + "type": "string", + "format": "path", + "description": "", + "help_text": "Type: `file`, multiple: `False`, default: `\"demultiplexer_logs\"`, direction: `output`. ", + "default": "demultiplexer_logs" + } + } + }, + "other arguments": { + "title": "Other arguments", + "type": "object", + "description": "No description", + "properties": { + "skip_copycomplete_check": { + "type": "boolean", + "description": "Disable the check for the presence of a \"CopyComplete.txt\" file in input\ndirectory in case of Illumina data.\n", + "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ", + "default": false + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input arguments" + }, + { + "$ref": "#/$defs/annotation flags" + }, + { + "$ref": "#/$defs/output arguments" + }, + { + "$ref": "#/$defs/other arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +}