diff --git a/CHANGELOG.md b/CHANGELOG.md index a055e800..3586defc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,82 @@ -# htrnaseq v0.x.y +# htrnaseq v0.8.0 ## Under the hood -- Bumped the version of Viash to v0.9.2 -- Moved the test resources to their new location +* Moved the test resources to their new location (PR 47) + +# htrnaseq v0.7.2 + +## Documentation + +* Update README (PR #54) + +# htrnaseq v0.7.1 + +## Bug fixes + +* Bump viash version to `0.9.4`. This adds support for nextflow versions starting major version 25.01 and +fixes an issue where an integer being passed to a argument with `type: double` resulted in an error (PR #51). + +* `reporting`: updated default colour mapping (PR #50). + +## Minor changes + +* `create_report`: bump bioconductor version to 3.21 in order to accommodate R version 4.5 (PR #52). + +# htrnaseq v0.7.0 + +## Breaking changes + +The `runner` and `htrnaseq` workflow now output FASTQ files corresponding to the barcodes per input ID (per sequencing run). +Previously, when multiple input folders or multiple input FASTQ files were provided +(for the `runner` and `htrnaseq` workflows respectively), the demultiplexed FASTQ files for these inputs were concatenated +and provided as output. For the `htrnaseq` workflow, reads can still be combined by using a newly added `sampleID` argument. +This means that two lists of FASTQ files can be provided for a single sample, and by assigning the same `sampleID`, +these reads will be joined. For example, with other arguments are left out for brevity: + +```yaml +- id: sample1_run1 + input_r1: [sample_1_L001_1_R1.fastq, sample_1_L002_1_R1.fastq] + input_r2: [sample_1_L001_1_R2.fastq, sample_1_L002_1_R2.fastq] + sampleID: "sample_1" +- id: sample1_run2 + input_r1: [sample_1_L001_1_R1.fastq, sample_1_L002_1_R1.fastq] + input_r2: [sample_1_L001_1_R2.fastq, sample_1_L002_1_R2.fastq] + sampleID: "sample_1" +- id: sample_2 + input_r1: [sample_2_L001_1_R1.fastq, sample_2_L002_1_R1.fastq] + input_r2: [sample_2_L001_1_R2.fastq, sample_2_L002_1_R2.fastq] +``` + +For the runner, concatenation of data across samples is automatically inferred. Previously, multiple IDs (events) could be +provided which were processed in parallel. This is no longer possible, as providing multiple will cause the matching +samples for these runs to be concatenated. + + +For example, the following old parameter yaml +```yaml +- id: run1 + input: ["run_folder_1/", run_folder_2/] +``` +should now be provided as: +```yaml +- id: run1 + input: "run_folder_1/" +- id: run2 + input: run_folder_2/ +``` + +## Minor changes + +* Updated viash to `0.9.2` (PR #49) + +# htrnaseq v0.6.0 + +## Breaking changes + +* `runner`: a subdirectory `data_processed` is now added to the output structure, in between + the experiment ID and the directory with the workflow date and version (PR #45). + # htrnaseq v0.5.5 ## New functionality diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f5b784fa --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 OpenPipelines + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 20d55213..5a36d937 100644 --- a/README.md +++ b/README.md @@ -1,129 +1,207 @@ -# HT-RNAseq - A pipeline for processing high-throughput RNA-seq data + + +# HT-RNAseq + +[![ViashHub](https://img.shields.io/badge/ViashHub-htrnaseq-7a4baa.svg)](https://www.viash-hub.com/packages/htrnaseq) +[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Fhtrnaseq-blue.svg)](https://github.com/viash-hub/htrnaseq) +[![GitHub +License](https://img.shields.io/github/license/viash-hub/htrnaseq.svg)](https://github.com/viash-hub/htrnaseq/blob/main/LICENSE) +[![GitHub +Issues](https://img.shields.io/github/issues/viash-hub/htrnaseq.svg)](https://github.com/viash-hub/htrnaseq/issues) +[![Viash +version](https://img.shields.io/badge/Viash-v0.9.2-blue.svg)](https://viash.io) ## Introduction -__TODO__: Add a description of the pipeline here. -## Test data +This workflow is designed to process high-throughput RNA-seq data, where +every well of a microarray plate is a sample. A fasta file provided as +input defines the mapping between sample barcodes and wells. -As test data, we use [a DRUGseq dataset](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE176150) from the [NCBI Sequence Read Archive](https://www.ncbi.nlm.nih.gov/sra). +The workflow is built in a modular fashion, where most of the base +functionality is provided by components from +[`biobox`](https://www.viash-hub.com/packages/biobox/latest) +supplemented by custom base components and workflow components in this +package. -The original data has been (partly) subsampled to reduce the test runtime. We used [seqtk](https://github.com/lh3/seqtk) for this with a seed of 1, e.g.: +The full workflow is split in two major subworkflows that can be run +independently: -```bash +- **Well-demultiplexing:** Split the input (plate/pool level) fastq + files per well. +- **Mapping, counting and QC:** Run per-well mapping, counting and + generate QC reports. + +Each of those can be started individually, or the full workflow can be +run in two ways: + +1. Run the [main + workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) + containing the main functionality. +2. Run the [(opinionated) + `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) + where a number of choices (input/output structure and location) have + been made. + +Input for the workflow has to be `fastq` files (zipped or not). For bcl +or other formats, please consider running +[demultiplex](https://www.viash-hub.com/packages/demultiplex) first. + +``` mermaid lang="mermaid" +flowchart TB + subgraph runner [runner] + direction TB + subgraph htrnaseq [HT-RNAseq] + direction LR + demultiplex[Well demultiplexing] + map + report + eset + end + end + + demultiplex --> map --> report --> eset + + class runner container + class htrnaseq container + class demultiplex container-inner + class map container-inner + class report container-inner + class eset container-inner + + class demultiplex node + class map node + class report node + class eset node +``` + +## Example usage + +### Test and example data + +If you want to explore this workflow, it’s possible to the use data we +use as test data: [a DRUGseq +dataset](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE176150) +from the [NCBI Sequence Read Archive](https://www.ncbi.nlm.nih.gov/sra). +For the unit and integration tests, this data has been (partly) +subsampled to reduce the test runtime. We used +[seqtk](https://github.com/lh3/seqtk) for this with a seed of 1, e.g.: + +``` bash seqtk sample -s1 orig/SRR14730302/VH02001614_S8_R1_001.fastq.gz 10000 > 10k/SRR14730302/VH02001614_S8_R1_001.fastq.gz ``` -The data is available at: `gs://viash-hub-test-data/htrnaseq/v1/`: +This data is available at: `gs://viash-hub-resources/htrnaseq/v1/`. -``` -❯ gcstree -f viash-hub-test-data/htrnaseq/v1/ -viash-hub-test-data -└── htrnaseq - └── v1 - ├── [ 48] 2-wells.fasta - ├── [465.3K] GSE176150_metadata.csv - ├── 100k - │ ├── SRR14730301 - │ │ ├── [8.5M] VH02001612_S9_R1_001.fastq - │ │ └── [14.9M] VH02001612_S9_R2_001.fastq - │ └── SRR14730302 - │ ├── [8.5M] VH02001614_S8_R1_001.fastq.gz - │ └── [14.9M] VH02001614_S8_R2_001.fastq.gz - ├── 10k - │ ├── SRR14730301 - │ │ ├── [845.4K] VH02001612_S9_R1_001.fastq - │ │ └── [1.5M] VH02001612_S9_R2_001.fastq - │ └── SRR14730302 - │ ├── [845.3K] VH02001614_S8_R1_001.fastq.gz - │ └── [1.5M] VH02001614_S8_R2_001.fastq.gz - └── orig - ├── [20.4G] SRR14730301 - │ └── [20.4G] SRR14730301 - ├── SRR14730301 - │ ├── [9.1G] VH02001612_S9_R1_001.fastq.gz - │ └── [22.0G] VH02001612_S9_R2_001.fastq.gz - ├── [16.9G] SRR14730302 - │ └── [16.9G] SRR14730302 - ├── SRR14730302 - │ ├── [7.6G] VH02001614_S8_R1_001.fastq.gz - │ └── [18.0G] VH02001614_S8_R2_001.fastq.gz - ├── [18.0G] SRR14730303 - │ └── [18.0G] SRR14730303 - ├── SRR14730303 - │ ├── [8.1G] VH02001618_S7_R1_001.fastq.gz - │ └── [19.2G] VH02001618_S7_R2_001.fastq.gz - ├── [16.5G] SRR14730304 - │ └── [16.5G] SRR14730304 - ├── SRR14730304 - │ ├── [7.5G] VH02001700_S6_R1_001.fastq.gz - │ └── [17.8G] VH02001700_S6_R2_001.fastq.gz - ├── [19.0G] SRR14730305 - │ └── [19.0G] SRR14730305 - ├── SRR14730305 - │ ├── [8.4G] VH02001702_S5_R1_001.fastq.gz - │ └── [20.6G] VH02001702_S5_R2_001.fastq.gz - ├── [14.6G] SRR14730306 - │ └── [14.6G] SRR14730306 - ├── SRR14730306 - │ ├── [6.6G] VH02001704_S4_R1_001.fastq.gz - │ └── [16.0G] VH02001704_S4_R2_001.fastq.gz - ├── [21.5G] SRR14730307 - │ └── [21.5G] SRR14730307 - ├── SRR14730307 - │ ├── [9.6G] VH02001708_S3_R1_001.fastq.gz - │ └── [23.2G] VH02001708_S3_R2_001.fastq.gz - ├── [20.7G] SRR14730308 - │ └── [20.7G] SRR14730308 - ├── SRR14730308 - │ ├── [9.3G] VH02001710_S2_R1_001.fastq.gz - │ └── [22.1G] VH02001710_S2_R2_001.fastq.gz - ├── [15.8G] SRR14730309 - │ └── [15.8G] SRR14730309 - └── SRR14730309 - ├── [7.2G] VH02001712_S1_R1_001.fastq.gz - └── [16.9G] VH02001712_S1_R2_001.fastq.gz +### Run from Viash Hub -18 directories, 37 files +Open [Viash Hub](https://www.viash-hub.com) and browse to the [htrnaseq +component](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq). +Press the ‘Launch’ button and follow the instructions. + +![](assets/htrnaseq-launch-small.png) + +We will start an example run loading just one input and using a barcodes +fasta file containing only 2 wells. + +In the first step, we add the `local` profile to the list of profiles in +order to limit the cpu and memory requirements of the workflow steps: + +![](assets/launch-parameters-1-small.png) + +In the next step, we provide the paramters as follows: + +- `input_r1`: + `gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R1_001.fastq` +- `input_r2`: + `gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R2_001.fastq` +- `genomeDir`: + `gs://viash-hub-test-data/htrnaseq/v1/genomeDir/subset/Homo_sapiens/v0.0.3/` +- `barcodesFasta`: + `gs://viash-hub-test-data/htrnaseq/v1/2-wells-with-ids.fasta` +- `annotation`: + `gs://viash-hub-test-data/htrnaseq/v1/genomeDir/gencode.v41.annotation.gtf.gz` + +Please note that both `input_r1` and `input_r2` can take multiple +values. This means that one has to press ENTER after pasting the input +path. + +![](assets/launch-parameters-2-small.png) + +Press the ‘Launch’ button at the end to get the instructions on how to +run the workflow from the CLI. + +### Run using NF-Tower / Seqera Cloud + +It’s possible to run the workflow directly from [Seqera +Cloud](https://cloud.seqera.io). The necessary [Nextflow schema +file](https://nextflow-io.github.io/nf-schema/latest/nextflow_schema/nextflow_schema_specification/) +has been built and provided with the workflows in order to use the +form-based input. However, Seqera Cloud can not deal with multiple-value +parameters when using the form-based input. Therefore, it’s better to +use Viash Hub also here: + +First, select the option to run the workflow using Seqera Cloud. You +will need to create an API token for your account. Once this token is +filled in in the corresponding field, you will get the option to select +a ‘Workspace’ and a ‘Compute environment’. + +![](assets/launch-parameters-3-small.png) + +Next, we need to fill in the parameters for the run. This is similar to +before: + +![](assets/launch-parameters-4-small.png) + +In the next screen, pressing the ‘Launch’ button will actually start the +workflow on Seqera Cloud. A message is shown when the submit was +successful. + +![](assets/launch-parameters-5-small.png) + +### Run from the CLI + +Running from the CLI directly without using Viash hub is possible. The +easiest is to just use the integrated help functionality, for instance +using the following: + +``` bash + nextflow run https://packages.viash-hub.com/vsh/htrnaseq.git \ + -revision v0.3.0 \ + -main-script target/nextflow/workflows/runner/main.nf \ + --help ``` +### (Optional) Resource usage tuning -The `orig` directory contains the original fastq files. The fastq files are available for 10k and 100k subsamples in the `10k` and `100k` directories, respectively. +Nextflow’s labels can be used to specify the amount of resources a +process can use. This workflow uses the following labels for CPU and +memory: -The `2-wells.fasta` file contains the barcodes for 2 wells. +- `verylowmem`, `lowmem`, `midmem`, `highmem` +- `verylowcpu`, `lowcpu`, `midcpu`, `highcpu` -## Test run +The defaults for these labels can be found at +`src/config/labels.config`. Nextflow checks that the specified resources +for a process do not exceed what is available on the machine and will +not start if it does. Create your own config file to tune the labels to +your needs, for example: -The pipeline can be run by creating a `params.yaml` file like this: + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } -```yaml -param_list: - - input_r1: "gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R1_001.fastq" - input_r2: "gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R2_001.fastq" - genomeDir: "gs://viash-hub-test-data/htrnaseq/v1/genomeDir/gencode.v41.star.sparse" - barcodesFasta: "gs://viash-hub-test-data/htrnaseq/v1/2-wells.fasta" - id: sample_one - - input_r1: "gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730302/VH02001614_S8_R1_001.fastq" - input_r2: "gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730302/VH02001614_S8_R2_001.fastq" - genomeDir: "gs://viash-hub-test-data/htrnaseq/v1/genomeDir/gencode.v41.star.sparse" - barcodesFasta: "gs://viash-hub-test-data/htrnaseq/v1/2-wells.fasta" - id: sample_two -``` + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } -and then: +When starting nextflow using the CLI, you can use `-c` to provide the +file to nextflow and overwrite the defaults. -```bash -viash ns build --setup cb -nextflow run . -main-script target/nextflow/workflows/htrnaseq/main.nf \ - -profile docker \ - -c target/nextflow/workflows/htrnaseq/nextflow.config \ - -params-file params.yaml \ - -resume \ - --publish_dir output -``` +## Contributions -Or, by running `src/workflows/htrnaseq/integration_test.sh`. +Developed in collaboration with Data Intuitive and Open Analytics. - -# Special Thanks - -Developed in collaboration with Data Intuitive and Open Analytics. \ No newline at end of file +Other contributions are welcome. diff --git a/README.qmd b/README.qmd new file mode 100644 index 00000000..ccb24557 --- /dev/null +++ b/README.qmd @@ -0,0 +1,149 @@ +--- +format: gfm +--- + +```{r setup, include=FALSE} +project <- yaml::read_yaml("_viash.yaml") +license <- paste0(project$links$repository, "/blob/main/LICENSE") +contributing <- paste0(project$links$repository, "/blob/main/CONTRIBUTING.md") +``` + +# HT-RNAseq + +[![ViashHub](https://img.shields.io/badge/ViashHub-`r project$name`-7a4baa.svg)](https://www.viash-hub.com/packages/`r project$name`) +[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2F`r project$name`-blue.svg)](`r project$links$repository`) +[![GitHub License](https://img.shields.io/github/license/viash-hub/`r project$name`.svg)](`r license`) +[![GitHub Issues](https://img.shields.io/github/issues/viash-hub/`r project$name`.svg)](`r project$links$issue_tracker`) +[![Viash version](https://img.shields.io/badge/Viash-v`r gsub("-", "--", project$viash_version)`-blue.svg)](https://viash.io) + +## Introduction + +`r project$description` + + +```{mermaid lang='mermaid'} +flowchart TB + subgraph runner [runner] + direction TB + subgraph htrnaseq [HT-RNAseq] + direction LR + demultiplex[Well demultiplexing] + map + report + eset + end + end + + demultiplex --> map --> report --> eset + + class runner container + class htrnaseq container + class demultiplex container-inner + class map container-inner + class report container-inner + class eset container-inner + + class demultiplex node + class map node + class report node + class eset node +``` + + +## Example usage + +### Test and example data + +If you want to explore this workflow, it's possible to the use data we use as test data: [a DRUGseq dataset](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE176150) from the [NCBI Sequence Read Archive](https://www.ncbi.nlm.nih.gov/sra). For the unit and integration tests, this data has been (partly) subsampled to reduce the test runtime. We used [seqtk](https://github.com/lh3/seqtk) for this with a seed of 1, e.g.: + +```bash +seqtk sample -s1 orig/SRR14730302/VH02001614_S8_R1_001.fastq.gz 10000 > 10k/SRR14730302/VH02001614_S8_R1_001.fastq.gz +``` + +This data is available at: `gs://viash-hub-test-data/htrnaseq/v1/`. + +### Run from Viash Hub + +Open [Viash Hub](https://www.viash-hub.com) and browse to the [htrnaseq component](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq). Press the 'Launch' button and follow the instructions. + + +![](assets/htrnaseq-launch-small.png) + +We will start an example run loading just one input and using a barcodes fasta file containing only 2 wells. + +In the first step, we add the `local` profile to the list of profiles in order to limit the cpu and memory requirements of the workflow steps: + + +![](assets/launch-parameters-1-small.png) + +In the next step, we provide the paramters as follows: + +- `input_r1`: `gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R1_001.fastq` +- `input_r2`: `gs://viash-hub-test-data/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R2_001.fastq` +- `genomeDir`: `gs://viash-hub-test-data/htrnaseq/v1/genomeDir/subset/Homo_sapiens/v0.0.3/` +- `barcodesFasta`: `gs://viash-hub-test-data/htrnaseq/v1/2-wells-with-ids.fasta` +- `annotation`: `gs://viash-hub-test-data/htrnaseq/v1/genomeDir/gencode.v41.annotation.gtf.gz` + +Please note that both `input_r1` and `input_r2` can take multiple values. This means that one has to press ENTER after pasting the input path. + +![](assets/launch-parameters-2-small.png) + +Press the 'Launch' button at the end to get the instructions on how to run the workflow from the CLI. + + +### Run using NF-Tower / Seqera Cloud + +It's possible to run the workflow directly from [Seqera Cloud](https://cloud.seqera.io). The necessary [Nextflow schema file](https://nextflow-io.github.io/nf-schema/latest/nextflow_schema/nextflow_schema_specification/) has been built and provided with the workflows in order to use the form-based input. However, Seqera Cloud can not deal with multiple-value parameters when using the form-based input. Therefore, it's better to use Viash Hub also here: + +First, select the option to run the workflow using Seqera Cloud. You will need to create an API token for your account. Once this token is filled in in the corresponding field, you will get the option to select a 'Workspace' and a 'Compute environment'. + +![](assets/launch-parameters-3-small.png) + +Next, we need to fill in the parameters for the run. This is similar to before: + +![](assets/launch-parameters-4-small.png) + +In the next screen, pressing the 'Launch' button will actually start the workflow on Seqera Cloud. A message is shown when the submit was successful. + +![](assets/launch-parameters-5-small.png) + +### Run from the CLI + +Running from the CLI directly without using Viash hub is possible. The easiest is to just use the integrated help functionality, for instance using the following: + +```bash + nextflow run https://packages.viash-hub.com/vsh/htrnaseq.git \ + -revision v0.3.0 \ + -main-script target/nextflow/workflows/runner/main.nf \ + --help +``` + +### (Optional) Resource usage tuning + +Nextflow's labels can be used to specify the amount of resources a process can use. This workflow uses the following labels for CPU and memory: + +* `verylowmem`, `lowmem`, `midmem`, `highmem` +* `verylowcpu`, `lowcpu`, `midcpu`, `highcpu` + +The defaults for these labels can be found at `src/config/labels.config`. Nextflow checks that the specified resources for a process do not exceed what is available on the machine and will not start if it does. Create your own config file to tune the labels to your needs, for example: + +``` +// Resource labels +withLabel: verylowcpu { cpus = 2 } +withLabel: lowcpu { cpus = 8 } +withLabel: midcpu { cpus = 16 } +withLabel: highcpu { cpus = 32 } + +withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } +withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } +withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } +withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } +``` + +When starting nextflow using the CLI, you can use `-c` to provide the file to nextflow and overwrite the defaults. + +## Contributions + +Developed in collaboration with Data Intuitive and Open Analytics. + +Other contributions are welcome. diff --git a/_viash.yaml b/_viash.yaml index cb8b9807..f0e5c4dd 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,13 +1,47 @@ name: htrnaseq +summary: | + A workflow for high-throughput RNA-seq data analyses. description: | - High-throughput pipeline [WIP] + This workflow is designed to process high-throughput RNA-seq data, where every + well of a microarray plate is a sample. A fasta file provided as input + defines the mapping between sample barcodes and wells. + + The workflow is built in a modular fashion, where most of the base functionality + is provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest) + supplemented by custom base components and workflow components in this package. + + The full workflow is split in two major subworkflows that can be run independently: + + * **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well. + * **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports. + + Each of those can be started individually, or the full workflow can be run in two ways: + + 1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) + containing the main functionality. + 2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a + number of choices (input/output structure and location) have been made. + + Input for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running + [demultiplex](https://www.viash-hub.com/packages/demultiplex) first. + license: MIT -keywords: [bioinformatics, sequence, high-throughput, mapping, counting, pipeline] +keywords: + [ + bioinformatics, + sequencing, + high-throughput, + RNAseq, + mapping, + counting, + pipeline, + workflow, + ] links: issue_tracker: https://github.com/viash-hub/htrnaseq/issues repository: https://github.com/viash-hub/htrnaseq -viash_version: 0.9.2 +viash_version: 0.9.4 info: test_resources: diff --git a/assets/htrnaseq-launch-small.png b/assets/htrnaseq-launch-small.png new file mode 100644 index 00000000..c0872d6f Binary files /dev/null and b/assets/htrnaseq-launch-small.png differ diff --git a/assets/htrnaseq-launch.png b/assets/htrnaseq-launch.png new file mode 100644 index 00000000..bcbd2f25 Binary files /dev/null and b/assets/htrnaseq-launch.png differ diff --git a/assets/launch-parameters-1-small.png b/assets/launch-parameters-1-small.png new file mode 100644 index 00000000..6fa7ef94 Binary files /dev/null and b/assets/launch-parameters-1-small.png differ diff --git a/assets/launch-parameters-1.png b/assets/launch-parameters-1.png new file mode 100644 index 00000000..5a76b1fd Binary files /dev/null and b/assets/launch-parameters-1.png differ diff --git a/assets/launch-parameters-2-small.png b/assets/launch-parameters-2-small.png new file mode 100644 index 00000000..7c4a9eff Binary files /dev/null and b/assets/launch-parameters-2-small.png differ diff --git a/assets/launch-parameters-2.png b/assets/launch-parameters-2.png new file mode 100644 index 00000000..3d8dc5b5 Binary files /dev/null and b/assets/launch-parameters-2.png differ diff --git a/assets/launch-parameters-3-small.png b/assets/launch-parameters-3-small.png new file mode 100644 index 00000000..7e23730b Binary files /dev/null and b/assets/launch-parameters-3-small.png differ diff --git a/assets/launch-parameters-3.png b/assets/launch-parameters-3.png new file mode 100644 index 00000000..cdb842c5 Binary files /dev/null and b/assets/launch-parameters-3.png differ diff --git a/assets/launch-parameters-4-small.png b/assets/launch-parameters-4-small.png new file mode 100644 index 00000000..4f9b7703 Binary files /dev/null and b/assets/launch-parameters-4-small.png differ diff --git a/assets/launch-parameters-4.png b/assets/launch-parameters-4.png new file mode 100644 index 00000000..2289f7c6 Binary files /dev/null and b/assets/launch-parameters-4.png differ diff --git a/assets/launch-parameters-5-small.png b/assets/launch-parameters-5-small.png new file mode 100644 index 00000000..a6e2700c Binary files /dev/null and b/assets/launch-parameters-5-small.png differ diff --git a/assets/launch-parameters-5.png b/assets/launch-parameters-5.png new file mode 100644 index 00000000..abbc1f75 Binary files /dev/null and b/assets/launch-parameters-5.png differ diff --git a/src/io/publish_fastqs/code.sh b/src/io/publish_fastqs/code.sh index ad400de2..4ee3c658 100755 --- a/src/io/publish_fastqs/code.sh +++ b/src/io/publish_fastqs/code.sh @@ -8,13 +8,8 @@ mkdir -p "$par_output" && echo "$par_output created" echo echo "Copying files..." -IFS=";" read -ra input_r1 <<<$par_input_r1 -IFS=";" read -ra input_r2 <<<$par_input_r2 +IFS=";" read -ra input <<<$par_input -for i in "${input_r1[@]}"; do +for i in "${input[@]}"; do cp -rL "$i" "$par_output/" -done - -for i in "${input_r2[@]}"; do - cp -rL "$i" "$par_output/" -done +done \ No newline at end of file diff --git a/src/io/publish_fastqs/config.vsh.yaml b/src/io/publish_fastqs/config.vsh.yaml index d82d62b9..580a80e9 100644 --- a/src/io/publish_fastqs/config.vsh.yaml +++ b/src/io/publish_fastqs/config.vsh.yaml @@ -4,13 +4,8 @@ description: "Publish the fastq files per well" argument_groups: - name: Input arguments arguments: - - name: --input_r1 - description: Directory to write R1 fastq data to - type: file - multiple: true - required: true - - name: --input_r2 - description: Directory to write R2 fastq data to + - name: --input + description: Directory to write fastq data to type: file multiple: true required: true diff --git a/src/parallel_map/script.sh b/src/parallel_map/script.sh index 9cac89ff..a2c489b0 100755 --- a/src/parallel_map/script.sh +++ b/src/parallel_map/script.sh @@ -114,7 +114,8 @@ for barcode_index in "${!barcodes[@]}"; do fi done echo "Did not find FASTQ files files for well ${well_id}! "\ - "Make sure that the input files have the correct file name format." + "Make sure that the input files have the correct file name format."\ + "Input files: ${input_r1[@]}" exit 1 done diff --git a/src/report/config.vsh.yaml b/src/report/config.vsh.yaml index cf0efbd7..c00ca403 100644 --- a/src/report/config.vsh.yaml +++ b/src/report/config.vsh.yaml @@ -40,6 +40,10 @@ engines: packages: - procps - pandoc + - type: r + script: + - install.packages("BiocManager") + - BiocManager::install(version = "3.21", type = "source", checkBuilt = TRUE) - type: r bioc: - Biobase diff --git a/src/report/plateLayouts.R b/src/report/plateLayouts.R index c0cd989d..a2909d4a 100644 --- a/src/report/plateLayouts.R +++ b/src/report/plateLayouts.R @@ -283,15 +283,31 @@ plateLayout <- function( if (is.null(colours)) { colours <- tryCatch({ - colorRamp2( + circlize::colorRamp2( breaks = breaks, colors = brewer.pal(length(breaks), "Purples") ) }, - error = function(cond) { - return(c("#9370DB", "white")) + error = function(cond){ + + message("Recomputed breaks for proper colour mapping") + + breakValues <- plateValues$values + breakValues[which(is.na(breakValues))] <- 0 + if (all(breakValues >= 0)) { + breaks <- computeBreaks(7, max(plateValues$values, na.rm = TRUE)) + } else { + breaks <- quantile(plateValues$values, probs = seq(0, 1, 0.125)) + } + + circlize::colorRamp2( + breaks = breaks, + colors = brewer.pal(length(breaks), "Purples") + ) + }) } + ht <- Heatmap( plateValues$values, column_title = mainTitle, column_title_side = "top", @@ -425,6 +441,7 @@ computeBreaks <- function(nBreaks, variable) { ) coefExp <- c(exp(coefSystem[1]), coefSystem[2]) breaks <- coefExp[1] * exp((1:(nBreaks - 1)) * coefExp[2]) + breaks <- unique(c(0, breaks)) } - return(c(0, breaks)) -} \ No newline at end of file + return(breaks) +} diff --git a/src/utils/concatRuns/config.vsh.yaml b/src/utils/concatRuns/config.vsh.yaml new file mode 100644 index 00000000..d7e30676 --- /dev/null +++ b/src/utils/concatRuns/config.vsh.yaml @@ -0,0 +1,43 @@ +name: concatRuns +namespace: utils +description: | + Concatenate well FASTQ files from different runs in order to increase sequencing depth. +arguments: + - name: "--input_r1" + type: file + required: true + multiple: true + - name: "--input_r2" + type: file + required: true + multiple: true + - name: "--sample_id" + type: string + required: true + - name: "--output_r1" + type: file + multiple: true + description: Path to read 1 fastq/fasta file + direction: output + - name: "--output_r2" + type: file + multiple: true + description: Path to read 2 fastq/fasta file + direction: output +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf +dependencies: + - name: concat_text + repository: cb +repositories: + - name: cb + type: vsh + repo: craftbox + tag: v0.1.0 +runners: + - type: nextflow + +engines: + - type: native diff --git a/src/utils/concatRuns/main.nf b/src/utils/concatRuns/main.nf new file mode 100644 index 00000000..7550ebee --- /dev/null +++ b/src/utils/concatRuns/main.nf @@ -0,0 +1,128 @@ +workflow run_wf { + + take: + input_ch + + main: + // Count the number of input events per sample + // Results from events with the same sample ID need to be concatenated. + event_counts_ch = input_ch + | map {id, state -> + def new_state = state + ["event_id": id] + def new_event = [state.sample_id, new_state] + return new_event + } + | groupTuple(by: 0) + | flatMap { id, states -> + def orig_event_ids = states.collect{it.event_id} + def new_events = orig_event_ids.collect{ orig_event_id -> + [orig_event_id, ["n_events": states.size()]] + } + return new_events + } + + + // The number of events per sample needs is passed number to `groupTuple()` + // so that it can emit the sample as soon as it is ready. This makes sure + // that the samples are processed asynchronously. + output_ch = input_ch.join(event_counts_ch) + | flatMap {id, state_demultiplex, state_event_counts -> + assert state_demultiplex.input_r1.size() == state_demultiplex.input_r2.size(), + "Expected output from well demultiplexing to contain equal amount or forward and reverse FASTQ files." + def new_states = [state_demultiplex.input_r1, state_demultiplex.input_r2].transpose().collect{ fastq_files -> + def (r1_file, r2_file) = fastq_files + def regex = ~/^(\w+)_R[12]{1}_001\.fastq(\.gz)?$/ + def parsed_file_name = r1_file.name =~ regex + def parsed_file_name_r2 = r2_file.name =~ regex + def well_id = parsed_file_name[0][1] + def well_id_r2 = parsed_file_name_r2[0][1] + + assert (well_id.length() != 0) && (well_id == well_id_r2) + def new_state = state_demultiplex + [ + "input_r1": r1_file, + "input_r2": r2_file, + "event_id": id, + ] + def group_settings = groupKey("${state_demultiplex.sample_id}_${well_id}", state_event_counts.n_events) + return [group_settings, new_state] + + } + return new_states + } + | groupTuple(by: 0, sort: "hash", remainder: true) + | map {group_settings, sample_states -> + def input_r1 = sample_states.collect{it.input_r1}.flatten() + def input_r2 = sample_states.collect{it.input_r2}.flatten() + def event_ids = sample_states.collect{it.event_id} + def sample_id_list = sample_states.collect{it.sample_id}.unique() + assert sample_id_list.size() == 1 + def sample_id = sample_id_list[0] + assert input_r1.size() == input_r2.size() + + def new_state = [ + "input_r1": input_r1, + "input_r2": input_r2, + "event_id": event_ids, + "sample_id": sample_id, + ] + return [group_settings.target, new_state] + } + | concat_text.run( + directives: [label: ["lowmem", "lowcpu"]], + key: "concat_samples_r1", + runIf: {id, state -> state.input_r1.size() > 1}, + fromState: { id, state -> + def output_file_name = state.input_r1[0].name + [ + input: state.input_r1, + gzip_output: false, + output: output_file_name + ] + }, + toState: { id, result, state -> + def newState = state + [ input_r1: [ result.output ] ] + return newState + } + ) + | concat_text.run( + directives: [label: ["lowmem", "lowcpu"]], + key: "concat_samples_r2", + runIf: {id, state -> state.input_r2.size() > 1}, + fromState: { id, state -> + def output_file_name = state.input_r2[0].name + [ + input: state.input_r2, + gzip_output: false, + output: output_file_name + ] + }, + toState: { id, result, state -> + def newState = state + [ input_r2: [ result.output ] ] + return newState + } + ) + | map {id, state -> + def new_state = [state.sample_id, state] + return new_state + } + | groupTuple(by: 0, sort: 'hash') + | map {id, states -> + def new_state = [ + "input_r1": states.collect{it.input_r1}.flatten(), + "input_r2": states.collect{it.input_r2}.flatten(), + "_meta": ["join_id": states[0].event_id[0]] + ] + return [id, new_state] + } + | setState( + [ + "output_r1": "input_r1", + "output_r2": "input_r2", + "_meta": "_meta" + ] + ) + + emit: + output_ch + +} diff --git a/src/workflows/htrnaseq/config.vsh.yaml b/src/workflows/htrnaseq/config.vsh.yaml index cf99c45f..609779b0 100644 --- a/src/workflows/htrnaseq/config.vsh.yaml +++ b/src/workflows/htrnaseq/config.vsh.yaml @@ -8,14 +8,14 @@ argument_groups: arguments: - name: --input_r1 description: | - Forward reads in FASTQ format. Multiple files can be provided which will + Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will be demultiplexed separately before joining the results for each individual well. type: file required: true multiple: true - name: --input_r2 description: | - Reverse reads in FASTQ format. Multiple files can be provided which will + Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will be demultiplexed separately before joining the results for each individual well. type: file required: true @@ -35,22 +35,22 @@ argument_groups: - name: --annotation type: file required: true + - name: --sample_id + type: string + required: false + description: | + Sample ID for the provided input files. If not provided, the value of --id + will be used. Input files will allways be demultiplexed separately, + but the FASTQs for wells with matching sample IDs will be concatenated before mapping. - name: Output arguments arguments: - - name: --fastq_output_r1 - description: List of demultiplexed fastq files + - name: "--fastq_output" + description: "Directory containing output fastq files" type: file - direction: output multiple: true required: true - default: "fastq/*_R1_001.fastq" - - name: --fastq_output_r2 - description: List of demultiplexed fastq files - type: file + default: "fastq/*" direction: output - multiple: true - required: true - default: "fastq/*_R2_001.fastq" - name: --star_output description: Output from mapping with STAR type: file @@ -120,6 +120,8 @@ dependencies: repository: local - name: report/create_report repository: local + - name: utils/concatRuns + repository: local repositories: - name: local type: local diff --git a/src/workflows/htrnaseq/main.nf b/src/workflows/htrnaseq/main.nf index 67d68420..25e663dd 100644 --- a/src/workflows/htrnaseq/main.nf +++ b/src/workflows/htrnaseq/main.nf @@ -1,33 +1,116 @@ workflow run_wf { take: - input_ch + raw_ch main: + input_ch = raw_ch + // Use the event ID as the default for the sample ID + | map {id, state -> + def sample_id = state.sample_id ?: id + def newState = state + ["sample_id": sample_id, "run_id": id] + return [id, newState] + } + // The featureData only has one requirement: the genome annotation. - // It can be generated straight away. + // It can be generated straight away. Most of the time, there is one shared + // annotation for all of the inputs and the fData should only be calculated once. + // The state is manpulated in such a way that there is one event created per unique + // input annotation file. In turn, the featureData file can joined into the original input + // channel which allows it to be shared across events if required. f_data_ch = input_ch + | toSortedList() + | flatMap {ids_and_states -> + def annotation_files = ids_and_states.inject([:]){ old_state, id_and_state -> + def (id, state) = id_and_state + def annotation_file = state.annotation + def new_state = old_state + [(annotation_file): (old_state.getOrDefault(annotation_file, []) + [id])] + return new_state + } + def file_names = annotation_files.keySet().collect{it.name} + assert (file_names.toSet().size() == file_names.size()), + "Please make sure that the annotation files have unique file names." + def new_states = annotation_files.collect{annotation_file, value -> + def new_state = [annotation_file.name , ["annotation": annotation_file, "event_ids": value]] + return new_state + } + return new_states + } | create_fdata.run( directives: [label: ["lowmem", "lowcpu"]], fromState: [ "gtf": "annotation", "output": "f_data" ], - toState: {id, result, state -> ["f_data": result.output]} + toState: ["f_data": "output"] ) + | flatMap {_, state -> + def new_states = state.event_ids.collect{event_id -> + [event_id, ["f_data": state.f_data]] + } + return new_states + } // Perform mapping of each well. - mapping_ch = input_ch + demultiplex_ch = input_ch | well_demultiplex.run( fromState: [ "input_r1": "input_r1", "input_r2": "input_r2", "barcodesFasta": "barcodesFasta", ], - toState: [ - "input_r1": "output_r1", - "input_r2": "output_r2", - ] + toState: {id, result, state -> + def all_fastq = result.output_r1 + result.output_r2 + def output_dir = all_fastq.collect{it.parent}.unique() + assert output_dir.size() == 1, "Expected output from well demultiplexing to reside into one directory." + def new_state = state + [ + "input_r1": result.output_r1, + "input_r2": result.output_r2, + "fastq_output_directory": output_dir[0], + ] + return new_state + } ) + + fastq_output_directory_ch = demultiplex_ch + | map {id, state -> + def new_event = [state.sample_id, state] + return new_event + } + | groupTuple(by: 0, sort: "hash") + | map {id, states -> + def fastq_output_dirs = states.collect{it.fastq_output_directory} + def new_state = ["fastq_output_directory": fastq_output_dirs] + def new_event = [id, new_state] + return [id, new_state] + } + + + concat_samples_ch = demultiplex_ch.join(f_data_ch) + | map {id, demutliplex_state, f_data_state -> + def newState = demutliplex_state + ["f_data": f_data_state["f_data"]] + [id, newState] + } + | concatRuns.run( + fromState: [ + "input_r1": "input_r1", + "input_r2": "input_r2", + "sample_id": "sample_id", + ], + toState: {id, result, state -> + def state_overwite = [ + "input_r1": result.output_r1, + "input_r2": result.output_r2, + "_meta": ["join_id": state.run_id] + ] + return state + state_overwite + } + ) + + pool_ch = concat_samples_ch.join(fastq_output_directory_ch) + | map {id, demux_state, fastq_output_directory_state -> + def new_state = demux_state + fastq_output_directory_state + return [id, new_state] + } | parallel_map.run( directives: ["label": ["highmem", "lowcpu"]], fromState: {id, state -> @@ -44,9 +127,6 @@ workflow run_wf { "star_output": "output", ] ) - - // From the mapped wells, create statistics based on the BAM files. - pool_ch = mapping_ch // Split the events from 1 event per pool into events per well // and add extra metadata about the wells to the state. | well_metadata.run( @@ -167,7 +247,7 @@ workflow run_wf { ] ) - p_data_ch = star_logs_ch.join(pool_statistics_ch, remainder: true) + eset_ch = star_logs_ch.join(pool_statistics_ch, remainder: true) | map {id, star_logs_state, pool_statistics_state -> def newState = star_logs_state + ["nrReadsNrGenesPerChromPool": pool_statistics_state.nrReadsNrGenesPerChromPool] return [id, newState] @@ -181,12 +261,6 @@ workflow run_wf { ], toState: ["p_data": "output"], ) - - eset_ch = p_data_ch.join(f_data_ch, remainder: true) - | map {id, p_data_state, f_data_state -> - def newState = p_data_state + ["f_data": f_data_state["f_data"]] - [id, newState] - } | create_eset.run( directives: [label: ["lowmem", "lowcpu"]], fromState: [ @@ -228,13 +302,14 @@ workflow run_wf { output_ch = eset_ch.join(report_channel) | map {id, state_eset, state_report -> - def new_state = state_eset + ["html_report": state_report.html_report] + def new_state = state_eset + [ + "html_report": state_report.html_report, + ] [id, new_state] } | setState([ - "star_output": "star_output", - "fastq_output_r1": "input_r1", - "fastq_output_r2": "input_r2", + "star_output": "star_output", + "fastq_output": "fastq_output_directory", "star_output": "star_output", "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChromPool", "star_qc_metrics": "star_qc_metrics", @@ -242,6 +317,7 @@ workflow run_wf { "f_data": "f_data", "p_data": "p_data", "html_report": "html_report", + "_meta": "_meta", ]) diff --git a/src/workflows/runner/config.vsh.yaml b/src/workflows/runner/config.vsh.yaml index 0b11ea2d..46644d3a 100644 --- a/src/workflows/runner/config.vsh.yaml +++ b/src/workflows/runner/config.vsh.yaml @@ -6,7 +6,6 @@ argument_groups: arguments: - name: --input description: Base directory of the form `s3://Sequencing///` - multiple: true type: file required: true - name: --barcodesFasta diff --git a/src/workflows/runner/main.nf b/src/workflows/runner/main.nf index 6864ddd7..a98bea4f 100644 --- a/src/workflows/runner/main.nf +++ b/src/workflows/runner/main.nf @@ -8,19 +8,13 @@ workflow run_wf { input_ch main: - output_ch = input_ch - // Multiple runs can be provided, and the reads for these runs will - // be concatenated. Here, we gather the FASTQ files from each input directory first. - | flatMap {id, state -> - // Create an input event per input directory - def new_state = state.input.withIndex().collect{input_dir, id_index -> - def state_item = state + ["input": input_dir, "index": id_index, "run_id": id] - return ["${id}_${id_index}".toString(), state_item] - } - return new_state - } + htrnaseq_ch = input_ch // List the FASTQ files per input directory // Be careful: an event per lane is created! + | map {id, state -> + def new_state = state + ["run_id": id] + return [id, new_state] + } | listInputDir.run( fromState: [ "input": "input", @@ -38,13 +32,11 @@ workflow run_wf { // there might be multiple FASTQs for a single sample that correspond to the // lanes. So the fastq files must be gathered across lanes and input folders // in order to create an input lists for R1 and R2. - | map {id, state -> [state.sample_id, state]} - | groupTuple(by: 0, sort: { state1, state2 -> - if (state1.index == state2.index) { - return state1.lane <=> state2.lane - } - return state1.index <=> state2.index - }) + // The ID of the event here is important! It determines the name of the output + // folders for the FASTQ files and these folders are published as-is later. + // The folder where the FASTQ files are stored in should be named after the run ID. + | map {id, state -> ["${state.sample_id}/${state.run_id}".toString(), state]} + | groupTuple(by: 0, sort: "hash") | map {id, states -> def new_r1 = states.collect{it.r1_output} def new_r2 = states.collect{it.r2_output} @@ -53,7 +45,7 @@ workflow run_wf { // TODO: this can be asserted. def new_state = states[0] + [ "r1": new_r1, - "r2": new_r2 + "r2": new_r2, ] return [id, new_state] } @@ -62,8 +54,7 @@ workflow run_wf { f_data: 'fData/$id.txt', p_data: 'pData/$id.txt', star_output: 'star_output/$id/*', - fastq_output_r1: 'fastq/*_R1_001.fastq', - fastq_output_r2: 'fastq/*_R1_001.fastq', + fastq_output: 'fastq/*', eset: 'esets/$id.rds', nrReadsNrGenesPerChrom: 'nrReadsNrGenesPerChrom/$id.txt', star_qc_metrics: 'starLogs/$id.txt', @@ -76,32 +67,32 @@ workflow run_wf { genomeDir: "genomeDir", annotation: "annotation", umi_length: "umi_length", + sample_id: "sample_id", ], toState: { id, result, state -> state + result } ) + // The HT-RNAseq workflow outputs multiple events, one per 'pool' (usually a plate) // but for publishing the results, this is not handy because we want to use the $id // variable as a pointer to the target data. // // So, we should combine everything together // - // project_id / experiment_id / date_workflow - + // project_id / experiment_id / "data_processed" / date_workflow + grouped_ch = htrnaseq_ch | toSortedList - | map{ vs -> def all_fastqs [ vs[0][1].run_id, // The original ID [ star_output: reduce_paths(vs.collect{ it[1].star_output }.flatten()), - fastq_output_r1: reduce_paths(vs.collect{ it[1].fastq_output_r1 }.flatten(), 1), - fastq_output_r2: reduce_paths(vs.collect{ it[1].fastq_output_r2 }.flatten(), 1), nrReadsNrGenesPerChrom: reduce_paths(vs.collect{ it[1].nrReadsNrGenesPerChrom }), star_qc_metrics: reduce_paths(vs.collect{ it[1].star_qc_metrics }), eset: reduce_paths(vs.collect{ it[1].eset }), f_data: reduce_paths(vs.collect{ it[1].f_data }), p_data: reduce_paths(vs.collect{ it[1].p_data }), + fastq_output: vs.collect{ it[1].fastq_output }.flatten().unique(), html_report: vs.collect{ it[1].html_report }[0], // The report is for all pools plain_output: vs.collect{ it[1].plain_output }[0], project_id: vs.collect{ it[1].project_id }[0], @@ -110,12 +101,13 @@ workflow run_wf { ] } + results_publish_ch = grouped_ch | publish_results.run( fromState: { id, state -> def project = (state.plain_output) ? id : "${state.project_id}" def experiment = (state.plain_output) ? id : "${state.experiment_id}" def id0 = "${project}/${experiment}" - def id1 = (state.plain_output) ? id : "${id0}/${date}" + def id1 = (state.plain_output) ? id : "${id0}/data_processed/${date}" def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}" if (id == id2) { @@ -146,14 +138,24 @@ workflow run_wf { ] ) + fastq_publish_ch = grouped_ch + | flatMap{id, state -> + def new_states = state.fastq_output.collect{fastq_dir -> + def new_id = fastq_dir.name // The folder name corresponds to the run + def fastq_files = fastq_dir.listFiles() + def new_state = [ + "fastq_output": fastq_files + ] + return [new_id, new_state] + } + return new_states + } | publish_fastqs.run( fromState: { id, state -> def id0 = "${id}" def id1 = (state.plain_output) ? id : "${id0}/${date}" def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}" - println(state.plain_output) - if (id == id2) { println("Publising fastqs to ${params.fastq_publish_dir}") } else { @@ -161,8 +163,7 @@ workflow run_wf { } [ - input_r1: state.fastq_output_r1, - input_r2: state.fastq_output_r2, + input: state.fastq_output, output: "${id2}", ] }, @@ -177,7 +178,7 @@ workflow run_wf { ) emit: - output_ch + grouped_ch | map{ id, state -> [ id, [ _meta: [ join_id: state.run_id ] ] ] } } diff --git a/src/workflows/well_demultiplex/main.nf b/src/workflows/well_demultiplex/main.nf index f13332b9..9fff61c0 100644 --- a/src/workflows/well_demultiplex/main.nf +++ b/src/workflows/well_demultiplex/main.nf @@ -60,6 +60,8 @@ workflow run_wf { output: new_output, error_rate: 0.10, demultiplex_mode: "single", + output_r1: state.output_r1, + output_r2: state.output_r2, ] }, toState: { id, result, state -> diff --git a/target/executable/eset/create_eset/.config.vsh.yaml b/target/executable/eset/create_eset/.config.vsh.yaml index 560a4754..ad339552 100644 --- a/target/executable/eset/create_eset/.config.vsh.yaml +++ b/target/executable/eset/create_eset/.config.vsh.yaml @@ -202,18 +202,35 @@ build_info: engine: "docker|native" output: "target/executable/eset/create_eset" executable: "target/executable/eset/create_eset/create_eset" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -225,11 +242,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/eset/create_eset/create_eset b/target/executable/eset/create_eset/create_eset index d57ec1f3..1ffe2c37 100755 --- a/target/executable/eset/create_eset/create_eset +++ b/target/executable/eset/create_eset/create_eset @@ -2,7 +2,7 @@ # create_eset update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -456,9 +456,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TR LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke" LABEL org.opencontainers.image.description="Companion container for running component eset create_eset" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/eset/create_fdata/.config.vsh.yaml b/target/executable/eset/create_fdata/.config.vsh.yaml index 13e1767e..a5ad5a8e 100644 --- a/target/executable/eset/create_fdata/.config.vsh.yaml +++ b/target/executable/eset/create_fdata/.config.vsh.yaml @@ -179,18 +179,35 @@ build_info: engine: "docker|native" output: "target/executable/eset/create_fdata" executable: "target/executable/eset/create_fdata/create_fdata" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -202,11 +219,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/eset/create_fdata/create_fdata b/target/executable/eset/create_fdata/create_fdata index 2c4c8b97..8bbcd6d3 100755 --- a/target/executable/eset/create_fdata/create_fdata +++ b/target/executable/eset/create_fdata/create_fdata @@ -2,7 +2,7 @@ # create_fdata update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke" LABEL org.opencontainers.image.description="Companion container for running component eset create_fdata" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/eset/create_pdata/.config.vsh.yaml b/target/executable/eset/create_pdata/.config.vsh.yaml index 0d256926..890a46fe 100644 --- a/target/executable/eset/create_pdata/.config.vsh.yaml +++ b/target/executable/eset/create_pdata/.config.vsh.yaml @@ -193,18 +193,35 @@ build_info: engine: "docker|native" output: "target/executable/eset/create_pdata" executable: "target/executable/eset/create_pdata/create_pdata" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -216,11 +233,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/eset/create_pdata/create_pdata b/target/executable/eset/create_pdata/create_pdata index cacd4201..872c64d0 100755 --- a/target/executable/eset/create_pdata/create_pdata +++ b/target/executable/eset/create_pdata/create_pdata @@ -2,7 +2,7 @@ # create_pdata update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke" LABEL org.opencontainers.image.description="Companion container for running component eset create_pdata" -LABEL org.opencontainers.image.created="2025-04-25T07:44:06Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:15Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml b/target/executable/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml index 7633b87a..67445f44 100644 --- a/target/executable/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml +++ b/target/executable/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml @@ -151,18 +151,35 @@ build_info: engine: "docker|native" output: "target/executable/integration_test_components/htrnaseq/check_eset" executable: "target/executable/integration_test_components/htrnaseq/check_eset/check_eset" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -174,11 +191,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/integration_test_components/htrnaseq/check_eset/check_eset b/target/executable/integration_test_components/htrnaseq/check_eset/check_eset index 5652cfc0..32f6f007 100755 --- a/target/executable/integration_test_components/htrnaseq/check_eset/check_eset +++ b/target/executable/integration_test_components/htrnaseq/check_eset/check_eset @@ -2,7 +2,7 @@ # check_eset update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -455,9 +455,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TR LABEL org.opencontainers.image.authors="Dries Schaumont" LABEL org.opencontainers.image.description="Companion container for running component integration_test_components/htrnaseq check_eset" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml b/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml index ca54ccaf..8b4ccf86 100644 --- a/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml +++ b/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml @@ -160,18 +160,35 @@ build_info: engine: "docker|native" output: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output" executable: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -183,11 +200,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output b/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output index b6492520..33c54447 100755 --- a/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output +++ b/target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output @@ -2,7 +2,7 @@ # check_cutadapt_output update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -457,9 +457,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont" LABEL org.opencontainers.image.description="Companion container for running component integration_test_components/well_demultiplexing check_cutadapt_output" -LABEL org.opencontainers.image.created="2025-04-25T07:44:04Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:15Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/io/publish_fastqs/.config.vsh.yaml b/target/executable/io/publish_fastqs/.config.vsh.yaml index 6831c968..4094c83c 100644 --- a/target/executable/io/publish_fastqs/.config.vsh.yaml +++ b/target/executable/io/publish_fastqs/.config.vsh.yaml @@ -5,18 +5,8 @@ argument_groups: - name: "Input arguments" arguments: - type: "file" - name: "--input_r1" - description: "Directory to write R1 fastq data to" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - - type: "file" - name: "--input_r2" - description: "Directory to write R2 fastq data to" + name: "--input" + description: "Directory to write fastq data to" info: null must_exist: true create_parent: true @@ -145,18 +135,35 @@ build_info: engine: "docker|native" output: "target/executable/io/publish_fastqs" executable: "target/executable/io/publish_fastqs/publish_fastqs" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -168,11 +175,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/io/publish_fastqs/publish_fastqs b/target/executable/io/publish_fastqs/publish_fastqs index 4469ea3c..0745a2ef 100755 --- a/target/executable/io/publish_fastqs/publish_fastqs +++ b/target/executable/io/publish_fastqs/publish_fastqs @@ -2,7 +2,7 @@ # publish_fastqs update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -450,9 +450,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component io publish_fastqs" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER @@ -575,13 +575,9 @@ function ViashHelp { echo "Publish the fastq files per well" echo "" echo "Input arguments:" - echo " --input_r1" + echo " --input" echo " type: file, required parameter, multiple values allowed, file must exist" - echo " Directory to write R1 fastq data to" - echo "" - echo " --input_r2" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " Directory to write R2 fastq data to" + echo " Directory to write fastq data to" echo "" echo "Output arguments:" echo " --output" @@ -638,37 +634,20 @@ while [[ $# -gt 0 ]]; do echo "publish_fastqs update-resources" exit ;; - --input_r1) - if [ -z "$VIASH_PAR_INPUT_R1" ]; then - VIASH_PAR_INPUT_R1="$2" + --input) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT="$2" else - VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;""$2" + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r1. Use "--help" to get more information on the parameters. && exit 1 + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; - --input_r1=*) - if [ -z "$VIASH_PAR_INPUT_R1" ]; then - VIASH_PAR_INPUT_R1=$(ViashRemoveFlags "$1") + --input=*) + if [ -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") else - VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --input_r2) - if [ -z "$VIASH_PAR_INPUT_R2" ]; then - VIASH_PAR_INPUT_R2="$2" - else - VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input_r2=*) - if [ -z "$VIASH_PAR_INPUT_R2" ]; then - VIASH_PAR_INPUT_R2=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;"$(ViashRemoveFlags "$1") + VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") fi shift 1 ;; @@ -855,12 +834,8 @@ fi # check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT_R1+x} ]; then - ViashError '--input_r1' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_INPUT_R2+x} ]; then - ViashError '--input_r2' is a required argument. Use "--help" to get more information on the parameters. +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. exit 1 fi if [ -z ${VIASH_META_NAME+x} ]; then @@ -894,22 +869,10 @@ if [ -z ${VIASH_PAR_OUTPUT+x} ]; then fi # check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then +if [ ! -z "$VIASH_PAR_INPUT" ]; then IFS=';' set -f - for file in $VIASH_PAR_INPUT_R1; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT_R2; do + for file in $VIASH_PAR_INPUT; do unset IFS if [ ! -e "$file" ]; then ViashError "Input file '$file' does not exist." @@ -1010,27 +973,16 @@ fi if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then # detect volumes from file arguments VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then - VIASH_TEST_INPUT_R1=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_TEST_INPUT=() IFS=';' - for var in $VIASH_PAR_INPUT_R1; do + for var in $VIASH_PAR_INPUT; do unset IFS VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT_R1+=( "$var" ) + VIASH_TEST_INPUT+=( "$var" ) done - VIASH_PAR_INPUT_R1=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R1[*]}") -fi -if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then - VIASH_TEST_INPUT_R2=() - IFS=';' - for var in $VIASH_PAR_INPUT_R2; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT_R2+=( "$var" ) - done - VIASH_PAR_INPUT_R2=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R2[*]}") + VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") fi if [ ! -z "$VIASH_PAR_OUTPUT" ]; then VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) @@ -1106,8 +1058,7 @@ trap interrupt INT SIGINT cat > "\$tempscript" << 'VIASHMAIN' ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "${VIASH_PAR_INPUT_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_input_r1='&'#" ; else echo "# par_input_r1="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) @@ -1139,14 +1090,9 @@ mkdir -p "\$par_output" && echo "\$par_output created" echo echo "Copying files..." -IFS=";" read -ra input_r1 <<<\$par_input_r1 -IFS=";" read -ra input_r2 <<<\$par_input_r2 +IFS=";" read -ra input <<<\$par_input -for i in "\${input_r1[@]}"; do - cp -rL "\$i" "\$par_output/" -done - -for i in "\${input_r2[@]}"; do +for i in "\${input[@]}"; do cp -rL "\$i" "\$par_output/" done VIASHMAIN @@ -1159,31 +1105,18 @@ VIASHEOF if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then # strip viash automount from file paths - if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then - unset VIASH_TEST_INPUT_R1 + if [ ! -z "$VIASH_PAR_INPUT" ]; then + unset VIASH_TEST_INPUT IFS=';' - for var in $VIASH_PAR_INPUT_R1; do + for var in $VIASH_PAR_INPUT; do unset IFS - if [ -z "$VIASH_TEST_INPUT_R1" ]; then - VIASH_TEST_INPUT_R1="$(ViashDockerStripAutomount "$var")" + if [ -z "$VIASH_TEST_INPUT" ]; then + VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" else - VIASH_TEST_INPUT_R1="$VIASH_TEST_INPUT_R1;""$(ViashDockerStripAutomount "$var")" + VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashDockerStripAutomount "$var")" fi done - VIASH_PAR_INPUT_R1="$VIASH_TEST_INPUT_R1" - fi - if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then - unset VIASH_TEST_INPUT_R2 - IFS=';' - for var in $VIASH_PAR_INPUT_R2; do - unset IFS - if [ -z "$VIASH_TEST_INPUT_R2" ]; then - VIASH_TEST_INPUT_R2="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT_R2="$VIASH_TEST_INPUT_R2;""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT_R2="$VIASH_TEST_INPUT_R2" + VIASH_PAR_INPUT="$VIASH_TEST_INPUT" fi if [ ! -z "$VIASH_PAR_OUTPUT" ]; then VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") diff --git a/target/executable/io/publish_results/.config.vsh.yaml b/target/executable/io/publish_results/.config.vsh.yaml index 5dc81469..a319d1a1 100644 --- a/target/executable/io/publish_results/.config.vsh.yaml +++ b/target/executable/io/publish_results/.config.vsh.yaml @@ -189,18 +189,35 @@ build_info: engine: "docker|native" output: "target/executable/io/publish_results" executable: "target/executable/io/publish_results/publish_results" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -212,11 +229,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/io/publish_results/publish_results b/target/executable/io/publish_results/publish_results index 8cb5a20b..fb71b351 100755 --- a/target/executable/io/publish_results/publish_results +++ b/target/executable/io/publish_results/publish_results @@ -2,7 +2,7 @@ # publish_results update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -450,9 +450,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component io publish_results" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/parallel_map/.config.vsh.yaml b/target/executable/parallel_map/.config.vsh.yaml index 98dd7b5c..fab32c75 100644 --- a/target/executable/parallel_map/.config.vsh.yaml +++ b/target/executable/parallel_map/.config.vsh.yaml @@ -281,18 +281,35 @@ build_info: engine: "docker|native" output: "target/executable/parallel_map" executable: "target/executable/parallel_map/parallel_map" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -304,11 +321,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/parallel_map/parallel_map b/target/executable/parallel_map/parallel_map index bd9f1d0a..4441b44c 100755 --- a/target/executable/parallel_map/parallel_map +++ b/target/executable/parallel_map/parallel_map @@ -2,7 +2,7 @@ # parallel_map update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -461,9 +461,9 @@ ENV STAR_BINARY=STAR COPY STAR /usr/local/bin/$STAR_BINARY LABEL org.opencontainers.image.authors="Dries Schaumont, Toni Verbeiren" LABEL org.opencontainers.image.description="Companion container for running component parallel_map" -LABEL org.opencontainers.image.created="2025-04-25T07:44:06Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:15Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER @@ -1447,7 +1447,8 @@ for barcode_index in "\${!barcodes[@]}"; do fi done echo "Did not find FASTQ files files for well \${well_id}! "\\ - "Make sure that the input files have the correct file name format." + "Make sure that the input files have the correct file name format."\\ + "Input files: \${input_r1[@]}" exit 1 done diff --git a/target/executable/report/create_report/.config.vsh.yaml b/target/executable/report/create_report/.config.vsh.yaml index 70225623..21db361a 100644 --- a/target/executable/report/create_report/.config.vsh.yaml +++ b/target/executable/report/create_report/.config.vsh.yaml @@ -164,6 +164,12 @@ engines: - "procps" - "pandoc" interactive: false + - type: "r" + script: + - "install.packages(\"BiocManager\")" + - "BiocManager::install(version = \"3.21\", type = \"source\", checkBuilt = TRUE)" + bioc_force_install: false + warnings_as_errors: true - type: "r" cran: - "ggplot2" @@ -205,18 +211,35 @@ build_info: engine: "docker|native" output: "target/executable/report/create_report" executable: "target/executable/report/create_report/create_report" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -228,11 +251,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/report/create_report/create_report b/target/executable/report/create_report/create_report index 1e413a1f..d9541a6a 100755 --- a/target/executable/report/create_report/create_report +++ b/target/executable/report/create_report/create_report @@ -2,7 +2,7 @@ # create_report update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -453,6 +453,9 @@ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y procps pandoc && \ rm -rf /var/lib/apt/lists/* +RUN Rscript -e 'options(warn = 2); install.packages("BiocManager")' && \ + Rscript -e 'options(warn = 2); BiocManager::install(version = "3.21", type = "source", checkBuilt = TRUE)' + RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ Rscript -e 'options(warn = 2); if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \ @@ -462,9 +465,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TR LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke" LABEL org.opencontainers.image.description="Companion container for running component report create_report" -LABEL org.opencontainers.image.created="2025-04-25T07:44:06Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:15Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/report/create_report/plateLayouts.R b/target/executable/report/create_report/plateLayouts.R index c0cd989d..a2909d4a 100755 --- a/target/executable/report/create_report/plateLayouts.R +++ b/target/executable/report/create_report/plateLayouts.R @@ -283,15 +283,31 @@ plateLayout <- function( if (is.null(colours)) { colours <- tryCatch({ - colorRamp2( + circlize::colorRamp2( breaks = breaks, colors = brewer.pal(length(breaks), "Purples") ) }, - error = function(cond) { - return(c("#9370DB", "white")) + error = function(cond){ + + message("Recomputed breaks for proper colour mapping") + + breakValues <- plateValues$values + breakValues[which(is.na(breakValues))] <- 0 + if (all(breakValues >= 0)) { + breaks <- computeBreaks(7, max(plateValues$values, na.rm = TRUE)) + } else { + breaks <- quantile(plateValues$values, probs = seq(0, 1, 0.125)) + } + + circlize::colorRamp2( + breaks = breaks, + colors = brewer.pal(length(breaks), "Purples") + ) + }) } + ht <- Heatmap( plateValues$values, column_title = mainTitle, column_title_side = "top", @@ -425,6 +441,7 @@ computeBreaks <- function(nBreaks, variable) { ) coefExp <- c(exp(coefSystem[1]), coefSystem[2]) breaks <- coefExp[1] * exp((1:(nBreaks - 1)) * coefExp[2]) + breaks <- unique(c(0, breaks)) } - return(c(0, breaks)) -} \ No newline at end of file + return(breaks) +} diff --git a/target/executable/stats/combine_star_logs/.config.vsh.yaml b/target/executable/stats/combine_star_logs/.config.vsh.yaml index d4188e4e..26751d80 100644 --- a/target/executable/stats/combine_star_logs/.config.vsh.yaml +++ b/target/executable/stats/combine_star_logs/.config.vsh.yaml @@ -200,18 +200,35 @@ build_info: engine: "docker|native" output: "target/executable/stats/combine_star_logs" executable: "target/executable/stats/combine_star_logs/combine_star_logs" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -223,11 +240,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/stats/combine_star_logs/combine_star_logs b/target/executable/stats/combine_star_logs/combine_star_logs index cb341cef..2fed05a4 100755 --- a/target/executable/stats/combine_star_logs/combine_star_logs +++ b/target/executable/stats/combine_star_logs/combine_star_logs @@ -2,7 +2,7 @@ # combine_star_logs update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -457,9 +457,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont" LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs" -LABEL org.opencontainers.image.created="2025-04-25T07:44:04Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:13Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/stats/generate_pool_statistics/.config.vsh.yaml b/target/executable/stats/generate_pool_statistics/.config.vsh.yaml index d2873907..cd9227ef 100644 --- a/target/executable/stats/generate_pool_statistics/.config.vsh.yaml +++ b/target/executable/stats/generate_pool_statistics/.config.vsh.yaml @@ -184,18 +184,35 @@ build_info: engine: "docker|native" output: "target/executable/stats/generate_pool_statistics" executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -207,11 +224,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/stats/generate_pool_statistics/generate_pool_statistics b/target/executable/stats/generate_pool_statistics/generate_pool_statistics index 731f172f..4414ec2b 100755 --- a/target/executable/stats/generate_pool_statistics/generate_pool_statistics +++ b/target/executable/stats/generate_pool_statistics/generate_pool_statistics @@ -2,7 +2,7 @@ # generate_pool_statistics update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke" LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/executable/stats/generate_well_statistics/.config.vsh.yaml b/target/executable/stats/generate_well_statistics/.config.vsh.yaml index dcc78bf4..a54eb9e6 100644 --- a/target/executable/stats/generate_well_statistics/.config.vsh.yaml +++ b/target/executable/stats/generate_well_statistics/.config.vsh.yaml @@ -266,18 +266,35 @@ build_info: engine: "docker|native" output: "target/executable/stats/generate_well_statistics" executable: "target/executable/stats/generate_well_statistics/generate_well_statistics" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -289,11 +306,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/executable/stats/generate_well_statistics/generate_well_statistics b/target/executable/stats/generate_well_statistics/generate_well_statistics index 1b5ecf78..9de477d8 100755 --- a/target/executable/stats/generate_well_statistics/generate_well_statistics +++ b/target/executable/stats/generate_well_statistics/generate_well_statistics @@ -2,7 +2,7 @@ # generate_well_statistics update-resources # -# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -461,9 +461,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke" LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics" -LABEL org.opencontainers.image.created="2025-04-25T07:44:05Z" +LABEL org.opencontainers.image.created="2025-05-08T12:58:14Z" LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq" -LABEL org.opencontainers.image.revision="d157606b49b157cd2955acf9124f9043fbd0ca5a" +LABEL org.opencontainers.image.revision="f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" LABEL org.opencontainers.image.version="update-resources" VIASHDOCKER diff --git a/target/nextflow/eset/create_eset/.config.vsh.yaml b/target/nextflow/eset/create_eset/.config.vsh.yaml index 92c7c33a..41319ee1 100644 --- a/target/nextflow/eset/create_eset/.config.vsh.yaml +++ b/target/nextflow/eset/create_eset/.config.vsh.yaml @@ -202,18 +202,35 @@ build_info: engine: "docker|native" output: "target/nextflow/eset/create_eset" executable: "target/nextflow/eset/create_eset/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -225,11 +242,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/eset/create_eset/main.nf b/target/nextflow/eset/create_eset/main.nf index deb8ef70..d37faa4b 100644 --- a/target/nextflow/eset/create_eset/main.nf +++ b/target/nextflow/eset/create_eset/main.nf @@ -1,6 +1,6 @@ // create_eset update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3308,14 +3303,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/eset/create_eset", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3324,7 +3321,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3335,11 +3332,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3358,7 +3357,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.R" cat > "$tempscript" << VIASHMAIN library(Biobase) library(data.table) @@ -4156,7 +4155,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4170,6 +4169,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/eset/create_eset/nextflow_schema.json b/target/nextflow/eset/create_eset/nextflow_schema.json index 1e6f217e..0dc57228 100644 --- a/target/nextflow/eset/create_eset/nextflow_schema.json +++ b/target/nextflow/eset/create_eset/nextflow_schema.json @@ -57,10 +57,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.rds`. ", - "help_text": "Type: `file`, required, default: `$id.$key.output.rds`. " + "description": "Type: `file`, required, default: `eset.$id.rds`. ", + "help_text": "Type: `file`, required, default: `eset.$id.rds`. " , - "default":"$id.$key.output.rds" + "default":"eset.$id.rds" } diff --git a/target/nextflow/eset/create_fdata/.config.vsh.yaml b/target/nextflow/eset/create_fdata/.config.vsh.yaml index e9983576..5243b702 100644 --- a/target/nextflow/eset/create_fdata/.config.vsh.yaml +++ b/target/nextflow/eset/create_fdata/.config.vsh.yaml @@ -179,18 +179,35 @@ build_info: engine: "docker|native" output: "target/nextflow/eset/create_fdata" executable: "target/nextflow/eset/create_fdata/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -202,11 +219,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/eset/create_fdata/main.nf b/target/nextflow/eset/create_fdata/main.nf index 5bf500c8..11f950bd 100644 --- a/target/nextflow/eset/create_fdata/main.nf +++ b/target/nextflow/eset/create_fdata/main.nf @@ -1,6 +1,6 @@ // create_fdata update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3278,14 +3273,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/eset/create_fdata", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3294,7 +3291,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3305,11 +3302,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3328,7 +3327,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN import logging import pandas as pd @@ -3821,7 +3820,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3835,6 +3834,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/eset/create_fdata/nextflow_schema.json b/target/nextflow/eset/create_fdata/nextflow_schema.json index cde29031..69cc44b1 100644 --- a/target/nextflow/eset/create_fdata/nextflow_schema.json +++ b/target/nextflow/eset/create_fdata/nextflow_schema.json @@ -27,10 +27,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited text file containing information about the \u0027gene\u0027 or \u0027transcript\u0027\nentries from the input GTF file", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited text file containing information about the \u0027gene\u0027 or \u0027transcript\u0027\nentries from the input GTF file. The \u0027transcript\u0027 entries are used in case the source\nof the GTF was \u0027refGene\u0027 or \u0027ncbiRefSeq\u0027. \n" + "description": "Type: `file`, default: `fData.$id.txt`. Tab-delimited text file containing information about the \u0027gene\u0027 or \u0027transcript\u0027\nentries from the input GTF file", + "help_text": "Type: `file`, default: `fData.$id.txt`. Tab-delimited text file containing information about the \u0027gene\u0027 or \u0027transcript\u0027\nentries from the input GTF file. The \u0027transcript\u0027 entries are used in case the source\nof the GTF was \u0027refGene\u0027 or \u0027ncbiRefSeq\u0027. \n" , - "default":"$id.$key.output.txt" + "default":"fData.$id.txt" } diff --git a/target/nextflow/eset/create_pdata/.config.vsh.yaml b/target/nextflow/eset/create_pdata/.config.vsh.yaml index 27c7ec3b..5f3fefa8 100644 --- a/target/nextflow/eset/create_pdata/.config.vsh.yaml +++ b/target/nextflow/eset/create_pdata/.config.vsh.yaml @@ -193,18 +193,35 @@ build_info: engine: "docker|native" output: "target/nextflow/eset/create_pdata" executable: "target/nextflow/eset/create_pdata/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -216,11 +233,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/eset/create_pdata/main.nf b/target/nextflow/eset/create_pdata/main.nf index 85d7108b..25b71b56 100644 --- a/target/nextflow/eset/create_pdata/main.nf +++ b/target/nextflow/eset/create_pdata/main.nf @@ -1,6 +1,6 @@ // create_pdata update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3292,14 +3287,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/eset/create_pdata", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3308,7 +3305,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3319,11 +3316,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3342,7 +3341,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN from itertools import batched import pandas as pd @@ -3761,7 +3760,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3775,6 +3774,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/eset/create_pdata/nextflow_schema.json b/target/nextflow/eset/create_pdata/nextflow_schema.json index e7a40906..85da40c1 100644 --- a/target/nextflow/eset/create_pdata/nextflow_schema.json +++ b/target/nextflow/eset/create_pdata/nextflow_schema.json @@ -37,10 +37,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. ", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. " + "description": "Type: `file`, default: `pData.$id.txt`. ", + "help_text": "Type: `file`, default: `pData.$id.txt`. " , - "default":"$id.$key.output.txt" + "default":"pData.$id.txt" } diff --git a/target/nextflow/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml b/target/nextflow/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml index aa694e66..a9478a0f 100644 --- a/target/nextflow/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml +++ b/target/nextflow/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml @@ -151,18 +151,35 @@ build_info: engine: "docker|native" output: "target/nextflow/integration_test_components/htrnaseq/check_eset" executable: "target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -174,11 +191,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf b/target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf index d0315d09..33e7fc4d 100644 --- a/target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf +++ b/target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf @@ -1,6 +1,6 @@ // check_eset update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3232,14 +3227,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/integration_test_components/htrnaseq/check_eset", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3248,7 +3245,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3259,11 +3256,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3282,7 +3281,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.R" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. @@ -3855,7 +3854,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3869,6 +3868,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml b/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml index d8f4d56d..f2bc31c2 100644 --- a/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml +++ b/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/.config.vsh.yaml @@ -160,18 +160,35 @@ build_info: engine: "docker|native" output: "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output" executable: "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -183,11 +200,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf b/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf index a1a5a3a2..c6d67913 100644 --- a/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf +++ b/target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf @@ -1,6 +1,6 @@ // check_cutadapt_output update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3243,14 +3238,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3259,7 +3256,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3270,11 +3267,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3293,7 +3292,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN import dnaio from operator import itemgetter @@ -3735,7 +3734,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3749,6 +3748,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/io/publish_fastqs/.config.vsh.yaml b/target/nextflow/io/publish_fastqs/.config.vsh.yaml index d8e56b12..355e5ae2 100644 --- a/target/nextflow/io/publish_fastqs/.config.vsh.yaml +++ b/target/nextflow/io/publish_fastqs/.config.vsh.yaml @@ -5,18 +5,8 @@ argument_groups: - name: "Input arguments" arguments: - type: "file" - name: "--input_r1" - description: "Directory to write R1 fastq data to" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" - - type: "file" - name: "--input_r2" - description: "Directory to write R2 fastq data to" + name: "--input" + description: "Directory to write fastq data to" info: null must_exist: true create_parent: true @@ -145,18 +135,35 @@ build_info: engine: "docker|native" output: "target/nextflow/io/publish_fastqs" executable: "target/nextflow/io/publish_fastqs/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -168,11 +175,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/io/publish_fastqs/main.nf b/target/nextflow/io/publish_fastqs/main.nf index feea38fd..b022de7d 100644 --- a/target/nextflow/io/publish_fastqs/main.nf +++ b/target/nextflow/io/publish_fastqs/main.nf @@ -1,6 +1,6 @@ // publish_fastqs update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3044,19 +3039,8 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--input_r1", - "description" : "Directory to write R1 fastq data to", - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--input_r2", - "description" : "Directory to write R2 fastq data to", + "name" : "--input", + "description" : "Directory to write fastq data to", "must_exist" : true, "create_parent" : true, "required" : true, @@ -3217,14 +3201,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/io/publish_fastqs", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3233,7 +3219,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3244,11 +3230,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3271,8 +3259,7 @@ tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "${VIASH_PAR_INPUT_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r1='&'#" ; else echo "# par_input_r1="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) @@ -3304,14 +3291,9 @@ mkdir -p "\\$par_output" && echo "\\$par_output created" echo echo "Copying files..." -IFS=";" read -ra input_r1 <<<\\$par_input_r1 -IFS=";" read -ra input_r2 <<<\\$par_input_r2 +IFS=";" read -ra input <<<\\$par_input -for i in "\\${input_r1[@]}"; do - cp -rL "\\$i" "\\$par_output/" -done - -for i in "\\${input_r2[@]}"; do +for i in "\\${input[@]}"; do cp -rL "\\$i" "\\$par_output/" done VIASHMAIN @@ -3648,7 +3630,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3662,6 +3644,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/io/publish_fastqs/nextflow_schema.json b/target/nextflow/io/publish_fastqs/nextflow_schema.json index 4cc77c4b..f341fc18 100644 --- a/target/nextflow/io/publish_fastqs/nextflow_schema.json +++ b/target/nextflow/io/publish_fastqs/nextflow_schema.json @@ -14,21 +14,11 @@ "properties": { - "input_r1": { + "input": { "type": "string", - "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R1 fastq data to", - "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R1 fastq data to" - - } - - - , - "input_r2": { - "type": - "string", - "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R2 fastq data to", - "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R2 fastq data to" + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write fastq data to", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write fastq data to" } @@ -47,10 +37,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. ", - "help_text": "Type: `file`, default: `$id.$key.output.output`. " + "description": "Type: `file`, default: `$id`. ", + "help_text": "Type: `file`, default: `$id`. " , - "default":"$id.$key.output.output" + "default":"$id" } diff --git a/target/nextflow/io/publish_results/.config.vsh.yaml b/target/nextflow/io/publish_results/.config.vsh.yaml index 3b2c4c52..b61a9760 100644 --- a/target/nextflow/io/publish_results/.config.vsh.yaml +++ b/target/nextflow/io/publish_results/.config.vsh.yaml @@ -189,18 +189,35 @@ build_info: engine: "docker|native" output: "target/nextflow/io/publish_results" executable: "target/nextflow/io/publish_results/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -212,11 +229,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/io/publish_results/main.nf b/target/nextflow/io/publish_results/main.nf index 716ef9b4..263303ab 100644 --- a/target/nextflow/io/publish_results/main.nf +++ b/target/nextflow/io/publish_results/main.nf @@ -1,6 +1,6 @@ // publish_results update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3266,14 +3261,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/io/publish_results", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3282,7 +3279,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3293,11 +3290,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3724,7 +3723,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3738,6 +3737,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/io/publish_results/nextflow_schema.json b/target/nextflow/io/publish_results/nextflow_schema.json index 570c5e95..3e2af79f 100644 --- a/target/nextflow/io/publish_results/nextflow_schema.json +++ b/target/nextflow/io/publish_results/nextflow_schema.json @@ -97,10 +97,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.output`. ", - "help_text": "Type: `file`, default: `$id.$key.output.output`. " + "description": "Type: `file`, default: `$id`. ", + "help_text": "Type: `file`, default: `$id`. " , - "default":"$id.$key.output.output" + "default":"$id" } diff --git a/target/nextflow/parallel_map/.config.vsh.yaml b/target/nextflow/parallel_map/.config.vsh.yaml index a8996e25..5884846e 100644 --- a/target/nextflow/parallel_map/.config.vsh.yaml +++ b/target/nextflow/parallel_map/.config.vsh.yaml @@ -281,18 +281,35 @@ build_info: engine: "docker|native" output: "target/nextflow/parallel_map" executable: "target/nextflow/parallel_map/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -304,11 +321,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/parallel_map/main.nf b/target/nextflow/parallel_map/main.nf index 2407451e..df4e849e 100644 --- a/target/nextflow/parallel_map/main.nf +++ b/target/nextflow/parallel_map/main.nf @@ -1,6 +1,6 @@ // parallel_map update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3378,14 +3373,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/parallel_map", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3394,7 +3391,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3405,11 +3402,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3566,7 +3565,8 @@ for barcode_index in "\\${!barcodes[@]}"; do fi done echo "Did not find FASTQ files files for well \\${well_id}! "\\\\ - "Make sure that the input files have the correct file name format." + "Make sure that the input files have the correct file name format."\\\\ + "Input files: \\${input_r1[@]}" exit 1 done @@ -4119,7 +4119,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -4133,6 +4133,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/parallel_map/nextflow_schema.json b/target/nextflow/parallel_map/nextflow_schema.json index e87db781..ce9ea034 100644 --- a/target/nextflow/parallel_map/nextflow_schema.json +++ b/target/nextflow/parallel_map/nextflow_schema.json @@ -119,10 +119,10 @@ "output": { "type": "string", - "description": "Type: List of `file`, required, default: `$id.$key.output_*./*`, multiple_sep: `\";\"`. A list of output folders which are the result of using STAR to map each input FASTQ pair STAR to the reference genome", - "help_text": "Type: List of `file`, required, default: `$id.$key.output_*./*`, multiple_sep: `\";\"`. A list of output folders which are the result of using STAR to map each input FASTQ pair STAR to the reference genome.\nThe order of the items DO NOT match with the order of the entries in the barcodes FASTA file or the input FASTQ pairs. \n" + "description": "Type: List of `file`, required, default: `./*`, multiple_sep: `\";\"`. A list of output folders which are the result of using STAR to map each input FASTQ pair STAR to the reference genome", + "help_text": "Type: List of `file`, required, default: `./*`, multiple_sep: `\";\"`. A list of output folders which are the result of using STAR to map each input FASTQ pair STAR to the reference genome.\nThe order of the items DO NOT match with the order of the entries in the barcodes FASTA file or the input FASTQ pairs. \n" , - "default":"$id.$key.output_*./*" + "default":"./*" } @@ -130,10 +130,10 @@ "joblog": { "type": "string", - "description": "Type: `file`, default: `$id.$key.joblog.txt`. Where to store the log file listing all the jobs", - "help_text": "Type: `file`, default: `$id.$key.joblog.txt`. Where to store the log file listing all the jobs." + "description": "Type: `file`, default: `execution_log.txt`. Where to store the log file listing all the jobs", + "help_text": "Type: `file`, default: `execution_log.txt`. Where to store the log file listing all the jobs." , - "default":"$id.$key.joblog.txt" + "default":"execution_log.txt" } diff --git a/target/nextflow/report/create_report/.config.vsh.yaml b/target/nextflow/report/create_report/.config.vsh.yaml index c9a3a1f0..99899121 100644 --- a/target/nextflow/report/create_report/.config.vsh.yaml +++ b/target/nextflow/report/create_report/.config.vsh.yaml @@ -164,6 +164,12 @@ engines: - "procps" - "pandoc" interactive: false + - type: "r" + script: + - "install.packages(\"BiocManager\")" + - "BiocManager::install(version = \"3.21\", type = \"source\", checkBuilt = TRUE)" + bioc_force_install: false + warnings_as_errors: true - type: "r" cran: - "ggplot2" @@ -205,18 +211,35 @@ build_info: engine: "docker|native" output: "target/nextflow/report/create_report" executable: "target/nextflow/report/create_report/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -228,11 +251,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/report/create_report/main.nf b/target/nextflow/report/create_report/main.nf index 5c76ae7c..1046a58d 100644 --- a/target/nextflow/report/create_report/main.nf +++ b/target/nextflow/report/create_report/main.nf @@ -1,6 +1,6 @@ // create_report update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3262,6 +3257,15 @@ meta = [ ], "interactive" : false }, + { + "type" : "r", + "script" : [ + "install.packages(\\"BiocManager\\")", + "BiocManager::install(version = \\"3.21\\", type = \\"source\\", checkBuilt = TRUE)" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + }, { "type" : "r", "cran" : [ @@ -3313,14 +3317,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/report/create_report", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3329,7 +3335,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3340,11 +3346,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3363,7 +3371,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.R" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. @@ -3771,7 +3779,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3785,6 +3793,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/report/create_report/plateLayouts.R b/target/nextflow/report/create_report/plateLayouts.R index c0cd989d..a2909d4a 100755 --- a/target/nextflow/report/create_report/plateLayouts.R +++ b/target/nextflow/report/create_report/plateLayouts.R @@ -283,15 +283,31 @@ plateLayout <- function( if (is.null(colours)) { colours <- tryCatch({ - colorRamp2( + circlize::colorRamp2( breaks = breaks, colors = brewer.pal(length(breaks), "Purples") ) }, - error = function(cond) { - return(c("#9370DB", "white")) + error = function(cond){ + + message("Recomputed breaks for proper colour mapping") + + breakValues <- plateValues$values + breakValues[which(is.na(breakValues))] <- 0 + if (all(breakValues >= 0)) { + breaks <- computeBreaks(7, max(plateValues$values, na.rm = TRUE)) + } else { + breaks <- quantile(plateValues$values, probs = seq(0, 1, 0.125)) + } + + circlize::colorRamp2( + breaks = breaks, + colors = brewer.pal(length(breaks), "Purples") + ) + }) } + ht <- Heatmap( plateValues$values, column_title = mainTitle, column_title_side = "top", @@ -425,6 +441,7 @@ computeBreaks <- function(nBreaks, variable) { ) coefExp <- c(exp(coefSystem[1]), coefSystem[2]) breaks <- coefExp[1] * exp((1:(nBreaks - 1)) * coefExp[2]) + breaks <- unique(c(0, breaks)) } - return(c(0, breaks)) -} \ No newline at end of file + return(breaks) +} diff --git a/target/nextflow/stats/combine_star_logs/.config.vsh.yaml b/target/nextflow/stats/combine_star_logs/.config.vsh.yaml index 54dfeaed..3815fc75 100644 --- a/target/nextflow/stats/combine_star_logs/.config.vsh.yaml +++ b/target/nextflow/stats/combine_star_logs/.config.vsh.yaml @@ -200,18 +200,35 @@ build_info: engine: "docker|native" output: "target/nextflow/stats/combine_star_logs" executable: "target/nextflow/stats/combine_star_logs/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -223,11 +240,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/stats/combine_star_logs/main.nf b/target/nextflow/stats/combine_star_logs/main.nf index d7a4a151..12fa011b 100644 --- a/target/nextflow/stats/combine_star_logs/main.nf +++ b/target/nextflow/stats/combine_star_logs/main.nf @@ -1,6 +1,6 @@ // combine_star_logs update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3294,14 +3289,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/stats/combine_star_logs", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3310,7 +3307,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3321,11 +3318,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3344,7 +3343,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN import logging import pandas as pd @@ -3926,7 +3925,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3940,6 +3939,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/stats/combine_star_logs/nextflow_schema.json b/target/nextflow/stats/combine_star_logs/nextflow_schema.json index 72f77d8b..d8ae39a9 100644 --- a/target/nextflow/stats/combine_star_logs/nextflow_schema.json +++ b/target/nextflow/stats/combine_star_logs/nextflow_schema.json @@ -57,10 +57,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files. \n" + "description": "Type: `file`, default: `starLogs.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files", + "help_text": "Type: `file`, default: `starLogs.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files. \n" , - "default":"$id.$key.output.txt" + "default":"starLogs.txt" } diff --git a/target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml b/target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml index 50029201..e9cdf997 100644 --- a/target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml +++ b/target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml @@ -184,18 +184,35 @@ build_info: engine: "docker|native" output: "target/nextflow/stats/generate_pool_statistics" executable: "target/nextflow/stats/generate_pool_statistics/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -207,11 +224,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/stats/generate_pool_statistics/main.nf b/target/nextflow/stats/generate_pool_statistics/main.nf index b9a043f0..7bf1edac 100644 --- a/target/nextflow/stats/generate_pool_statistics/main.nf +++ b/target/nextflow/stats/generate_pool_statistics/main.nf @@ -1,6 +1,6 @@ // generate_pool_statistics update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3278,14 +3273,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/stats/generate_pool_statistics", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3294,7 +3291,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3305,11 +3302,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3328,7 +3327,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN import pandas as pd from pathlib import Path @@ -3781,7 +3780,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3795,6 +3794,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/stats/generate_well_statistics/.config.vsh.yaml b/target/nextflow/stats/generate_well_statistics/.config.vsh.yaml index 90128fa9..a95377f6 100644 --- a/target/nextflow/stats/generate_well_statistics/.config.vsh.yaml +++ b/target/nextflow/stats/generate_well_statistics/.config.vsh.yaml @@ -266,18 +266,35 @@ build_info: engine: "docker|native" output: "target/nextflow/stats/generate_well_statistics" executable: "target/nextflow/stats/generate_well_statistics/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -289,11 +306,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/stats/generate_well_statistics/main.nf b/target/nextflow/stats/generate_well_statistics/main.nf index 9425065d..80383217 100644 --- a/target/nextflow/stats/generate_well_statistics/main.nf +++ b/target/nextflow/stats/generate_well_statistics/main.nf @@ -1,6 +1,6 @@ // generate_well_statistics update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3373,14 +3368,16 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/stats/generate_well_statistics", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3389,7 +3386,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3400,11 +3397,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3423,7 +3422,7 @@ meta = [ // inner workflow hook def innerWorkflowFactory(args) { def rawScript = '''set -e -tempscript=".viash_script.sh" +tempscript=".viash_script.py" cat > "$tempscript" << VIASHMAIN import pysam import pandas as pd @@ -3867,7 +3866,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3881,6 +3880,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names diff --git a/target/nextflow/stats/generate_well_statistics/nextflow_schema.json b/target/nextflow/stats/generate_well_statistics/nextflow_schema.json index 3e2b583d..a432968c 100644 --- a/target/nextflow/stats/generate_well_statistics/nextflow_schema.json +++ b/target/nextflow/stats/generate_well_statistics/nextflow_schema.json @@ -47,10 +47,10 @@ "processedBAMFile": { "type": "string", - "description": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a ", - "help_text": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a .tsv file listing, per read in the BAM file,\nthe value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome to which the read was mapped to.\n" + "description": "Type: `file`, default: `processedBamFile.txt`. Path to a ", + "help_text": "Type: `file`, default: `processedBamFile.txt`. Path to a .tsv file listing, per read in the BAM file,\nthe value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome to which the read was mapped to.\n" , - "default":"$id.$key.processedBAMFile.txt" + "default":"processedBamFile.txt" } @@ -58,10 +58,10 @@ "nrReadsNrGenesPerChrom": { "type": "string", - "description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a ", - "help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n" + "description": "Type: `file`, default: `nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a ", + "help_text": "Type: `file`, default: `nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n" , - "default":"$id.$key.nrReadsNrGenesPerChrom.txt" + "default":"nrReadsNrGenesPerChrom.txt" } @@ -69,10 +69,10 @@ "nrReadsNrUMIsPerCB": { "type": "string", - "description": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a ", - "help_text": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a .tsv formatted table describing\nper barcode the number of UMI\u0027s (nrUMIs) and the total number of reads (NumberOfReads).\n" + "description": "Type: `file`, default: `nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a ", + "help_text": "Type: `file`, default: `nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a .tsv formatted table describing\nper barcode the number of UMI\u0027s (nrUMIs) and the total number of reads (NumberOfReads).\n" , - "default":"$id.$key.nrReadsNrUMIsPerCB.txt" + "default":"nrReadsNrUMIsPerCB.txt" } @@ -80,10 +80,10 @@ "umiFreqTop": { "type": "string", - "description": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a ", - "help_text": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a .tsv formatted table describing\nper UMI (column UB) the frequency at which they occur in the reads (column\nN). Only the top 100 UMIs are included.\n" + "description": "Type: `file`, default: `umiFreqTop100.txt`. Path to an output file that contains a ", + "help_text": "Type: `file`, default: `umiFreqTop100.txt`. Path to an output file that contains a .tsv formatted table describing\nper UMI (column UB) the frequency at which they occur in the reads (column\nN). Only the top 100 UMIs are included.\n" , - "default":"$id.$key.umiFreqTop.txt" + "default":"umiFreqTop100.txt" } diff --git a/target/nextflow/utils/concatRuns/.config.vsh.yaml b/target/nextflow/utils/concatRuns/.config.vsh.yaml new file mode 100644 index 00000000..1f8df563 --- /dev/null +++ b/target/nextflow/utils/concatRuns/.config.vsh.yaml @@ -0,0 +1,212 @@ +name: "concatRuns" +namespace: "utils" +version: "update-resources" +argument_groups: +- name: "Arguments" + arguments: + - type: "file" + name: "--input_r1" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--input_r2" + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--sample_id" + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_r1" + description: "Path to read 1 fastq/fasta file" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--output_r2" + description: "Path to read 2 fastq/fasta file" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Concatenate well FASTQ files from different runs in order to increase\ + \ sequencing depth.\n" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +requirements: + commands: + - "ps" +dependencies: +- name: "concat_text" + repository: + type: "vsh" + repo: "craftbox" + tag: "v0.1.0" +repositories: +- type: "vsh" + name: "cb" + repo: "craftbox" + tag: "v0.1.0" +license: "MIT" +links: + repository: "https://github.com/viash-hub/htrnaseq" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +- type: "native" + id: "native" +build_info: + config: "src/utils/concatRuns/config.vsh.yaml" + runner: "nextflow" + engine: "native|native" + output: "target/nextflow/utils/concatRuns" + executable: "target/nextflow/utils/concatRuns/main.nf" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" + git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" + dependencies: + - "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/concat_text" +package_config: + name: "htrnaseq" + version: "update-resources" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" + info: + test_resources: + - path: "gs://viash-hub-resources/htrnaseq/v1" + dest: "resources_test" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\ + \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\ + \ dest: 'nextflow_labels.config'}\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'update-resources'" + keywords: + - "bioinformatics" + - "sequencing" + - "high-throughput" + - "RNAseq" + - "mapping" + - "counting" + - "pipeline" + - "workflow" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/htrnaseq" + issue_tracker: "https://github.com/viash-hub/htrnaseq/issues" diff --git a/target/nextflow/utils/concatRuns/main.nf b/target/nextflow/utils/concatRuns/main.nf new file mode 100644 index 00000000..cf5658f1 --- /dev/null +++ b/target/nextflow/utils/concatRuns/main.nf @@ -0,0 +1,3540 @@ +// concatRuns update-resources +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "concatRuns", + "namespace" : "utils", + "version" : "update-resources", + "argument_groups" : [ + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input_r1", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_r2", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sample_id", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_r1", + "description" : "Path to read 1 fastq/fasta file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_r2", + "description" : "Path to read 2 fastq/fasta file", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/config/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n", + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "requirements" : { + "commands" : [ + "ps" + ] + }, + "dependencies" : [ + { + "name" : "concat_text", + "repository" : { + "type" : "vsh", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "cb", + "repo" : "craftbox", + "tag" : "v0.1.0" + } + ], + "license" : "MIT", + "links" : { + "repository" : "https://github.com/viash-hub/htrnaseq" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/utils/concatRuns/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native|native", + "output" : "target/nextflow/utils/concatRuns", + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" + }, + "package_config" : { + "name" : "htrnaseq", + "version" : "update-resources", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", + "info" : { + "test_resources" : [ + { + "path" : "gs://viash-hub-resources/htrnaseq/v1", + "dest" : "resources_test" + } + ] + }, + "viash_version" : "0.9.4", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'\n.resources += {path: '/src/config/labels.config', dest: 'nextflow_labels.config'}\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'update-resources'" + ], + "keywords" : [ + "bioinformatics", + "sequencing", + "high-throughput", + "RNAseq", + "mapping", + "counting", + "pipeline", + "workflow" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/htrnaseq", + "issue_tracker" : "https://github.com/viash-hub/htrnaseq/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { concat_text } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/concat_text/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + + take: + input_ch + + main: + // Count the number of input events per sample + // Results from events with the same sample ID need to be concatenated. + event_counts_ch = input_ch + | map {id, state -> + def new_state = state + ["event_id": id] + def new_event = [state.sample_id, new_state] + return new_event + } + | groupTuple(by: 0) + | flatMap { id, states -> + def orig_event_ids = states.collect{it.event_id} + def new_events = orig_event_ids.collect{ orig_event_id -> + [orig_event_id, ["n_events": states.size()]] + } + return new_events + } + + + // The number of events per sample needs is passed number to `groupTuple()` + // so that it can emit the sample as soon as it is ready. This makes sure + // that the samples are processed asynchronously. + output_ch = input_ch.join(event_counts_ch) + | flatMap {id, state_demultiplex, state_event_counts -> + assert state_demultiplex.input_r1.size() == state_demultiplex.input_r2.size(), + "Expected output from well demultiplexing to contain equal amount or forward and reverse FASTQ files." + def new_states = [state_demultiplex.input_r1, state_demultiplex.input_r2].transpose().collect{ fastq_files -> + def (r1_file, r2_file) = fastq_files + def regex = ~/^(\w+)_R[12]{1}_001\.fastq(\.gz)?$/ + def parsed_file_name = r1_file.name =~ regex + def parsed_file_name_r2 = r2_file.name =~ regex + def well_id = parsed_file_name[0][1] + def well_id_r2 = parsed_file_name_r2[0][1] + + assert (well_id.length() != 0) && (well_id == well_id_r2) + def new_state = state_demultiplex + [ + "input_r1": r1_file, + "input_r2": r2_file, + "event_id": id, + ] + def group_settings = groupKey("${state_demultiplex.sample_id}_${well_id}", state_event_counts.n_events) + return [group_settings, new_state] + + } + return new_states + } + | groupTuple(by: 0, sort: "hash", remainder: true) + | map {group_settings, sample_states -> + def input_r1 = sample_states.collect{it.input_r1}.flatten() + def input_r2 = sample_states.collect{it.input_r2}.flatten() + def event_ids = sample_states.collect{it.event_id} + def sample_id_list = sample_states.collect{it.sample_id}.unique() + assert sample_id_list.size() == 1 + def sample_id = sample_id_list[0] + assert input_r1.size() == input_r2.size() + + def new_state = [ + "input_r1": input_r1, + "input_r2": input_r2, + "event_id": event_ids, + "sample_id": sample_id, + ] + return [group_settings.target, new_state] + } + | concat_text.run( + directives: [label: ["lowmem", "lowcpu"]], + key: "concat_samples_r1", + runIf: {id, state -> state.input_r1.size() > 1}, + fromState: { id, state -> + def output_file_name = state.input_r1[0].name + [ + input: state.input_r1, + gzip_output: false, + output: output_file_name + ] + }, + toState: { id, result, state -> + def newState = state + [ input_r1: [ result.output ] ] + return newState + } + ) + | concat_text.run( + directives: [label: ["lowmem", "lowcpu"]], + key: "concat_samples_r2", + runIf: {id, state -> state.input_r2.size() > 1}, + fromState: { id, state -> + def output_file_name = state.input_r2[0].name + [ + input: state.input_r2, + gzip_output: false, + output: output_file_name + ] + }, + toState: { id, result, state -> + def newState = state + [ input_r2: [ result.output ] ] + return newState + } + ) + | map {id, state -> + def new_state = [state.sample_id, state] + return new_state + } + | groupTuple(by: 0, sort: 'hash') + | map {id, states -> + def new_state = [ + "input_r1": states.collect{it.input_r1}.flatten(), + "input_r2": states.collect{it.input_r2}.flatten(), + "_meta": ["join_id": states[0].event_id[0]] + ] + return [id, new_state] + } + | setState( + [ + "output_r1": "input_r1", + "output_r2": "input_r2", + "_meta": "_meta" + ] + ) + + emit: + output_ch + +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/utils/concatRuns/nextflow.config b/target/nextflow/utils/concatRuns/nextflow.config new file mode 100644 index 00000000..2babf1da --- /dev/null +++ b/target/nextflow/utils/concatRuns/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'utils/concatRuns' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'update-resources' + description = 'Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/utils/concatRuns/nextflow_labels.config b/target/nextflow/utils/concatRuns/nextflow_labels.config new file mode 100644 index 00000000..2821ec46 --- /dev/null +++ b/target/nextflow/utils/concatRuns/nextflow_labels.config @@ -0,0 +1,108 @@ +executor { + $k8s { + submitRateLimit = '10sec' + pollInterval = '1 sec' + } +} + +process { + container = 'nextflow/bash:latest' + + // default resources + memory = { 8.Gb * task.attempt } + cpus = 8 + maxForks = 36 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = 192.GB + + // Resource labels + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 8 } + withLabel: midcpu { cpus = 16 } + withLabel: highcpu { cpus = 32 } + + withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } } + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } } + +} + +profiles { + // detect tempdir + tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' + ).toAbsolutePath() + + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + + docker { + docker.fixOwnership = true + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + + local { + // This config is for local processing. + process { + withName: ".*parallel_map_process" { + maxForks = 1 + } + maxMemory = 25.GB + withLabel: verylowcpu { cpus = 2 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 6 } + withLabel: highcpu { cpus = 8 } + + withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } } + } + } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } + } diff --git a/target/nextflow/utils/concatRuns/nextflow_schema.json b/target/nextflow/utils/concatRuns/nextflow_schema.json new file mode 100644 index 00000000..e39d0478 --- /dev/null +++ b/target/nextflow/utils/concatRuns/nextflow_schema.json @@ -0,0 +1,112 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "concatRuns", +"description": "Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n", +"type": "object", +"definitions": { + + + + "arguments" : { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + + + "input_r1": { + "type": + "string", + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. " + + } + + + , + "input_r2": { + "type": + "string", + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. ", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. " + + } + + + , + "sample_id": { + "type": + "string", + "description": "Type: `string`, required. ", + "help_text": "Type: `string`, required. " + + } + + + , + "output_r1": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.output_r1_*`, multiple_sep: `\";\"`. Path to read 1 fastq/fasta file", + "help_text": "Type: List of `file`, default: `$id.$key.output_r1_*`, multiple_sep: `\";\"`. Path to read 1 fastq/fasta file" + , + "default":"$id.$key.output_r1_*" + } + + + , + "output_r2": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.output_r2_*`, multiple_sep: `\";\"`. Path to read 2 fastq/fasta file", + "help_text": "Type: List of `file`, default: `$id.$key.output_r2_*`, multiple_sep: `\";\"`. Path to read 2 fastq/fasta file" + , + "default":"$id.$key.output_r2_*" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/arguments" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/utils/listInputDir/.config.vsh.yaml b/target/nextflow/utils/listInputDir/.config.vsh.yaml index def49047..07c773c1 100644 --- a/target/nextflow/utils/listInputDir/.config.vsh.yaml +++ b/target/nextflow/utils/listInputDir/.config.vsh.yaml @@ -167,18 +167,35 @@ build_info: engine: "native|native" output: "target/nextflow/utils/listInputDir" executable: "target/nextflow/utils/listInputDir/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -190,11 +207,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/utils/listInputDir/main.nf b/target/nextflow/utils/listInputDir/main.nf index cdc71be9..15dfdf0d 100644 --- a/target/nextflow/utils/listInputDir/main.nf +++ b/target/nextflow/utils/listInputDir/main.nf @@ -1,6 +1,6 @@ // listInputDir update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3238,14 +3233,16 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/utils/listInputDir", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3254,7 +3251,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3265,11 +3262,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", diff --git a/target/nextflow/utils/listInputDir/nextflow_schema.json b/target/nextflow/utils/listInputDir/nextflow_schema.json index 0e90a1d0..ee2f29c9 100644 --- a/target/nextflow/utils/listInputDir/nextflow_schema.json +++ b/target/nextflow/utils/listInputDir/nextflow_schema.json @@ -38,10 +38,10 @@ "r1_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.r1_output.r1_output`. Path to read 1 fastq/fasta file", - "help_text": "Type: `file`, default: `$id.$key.r1_output.r1_output`. Path to read 1 fastq/fasta file" + "description": "Type: `file`, default: `$id.$key.r1_output`. Path to read 1 fastq/fasta file", + "help_text": "Type: `file`, default: `$id.$key.r1_output`. Path to read 1 fastq/fasta file" , - "default":"$id.$key.r1_output.r1_output" + "default":"$id.$key.r1_output" } @@ -49,10 +49,10 @@ "r2_output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.r2_output.r2_output`. Path to read 2 fastq/fasta file", - "help_text": "Type: `file`, default: `$id.$key.r2_output.r2_output`. Path to read 2 fastq/fasta file" + "description": "Type: `file`, default: `$id.$key.r2_output`. Path to read 2 fastq/fasta file", + "help_text": "Type: `file`, default: `$id.$key.r2_output`. Path to read 2 fastq/fasta file" , - "default":"$id.$key.r2_output.r2_output" + "default":"$id.$key.r2_output" } diff --git a/target/nextflow/workflows/htrnaseq/.config.vsh.yaml b/target/nextflow/workflows/htrnaseq/.config.vsh.yaml index 6a92f13f..b4b06b46 100644 --- a/target/nextflow/workflows/htrnaseq/.config.vsh.yaml +++ b/target/nextflow/workflows/htrnaseq/.config.vsh.yaml @@ -20,9 +20,9 @@ argument_groups: arguments: - type: "file" name: "--input_r1" - description: "Forward reads in FASTQ format. Multiple files can be provided which\ - \ will\nbe demultiplexed separately before joining the results for each individual\ - \ well.\n" + description: "Forward reads in FASTQ format. Multiple files corresponding to different\ + \ lanes can be provided which will\nbe demultiplexed separately before joining\ + \ the results for each individual well.\n" info: null must_exist: true create_parent: true @@ -32,9 +32,9 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--input_r2" - description: "Reverse reads in FASTQ format. Multiple files can be provided which\ - \ will\nbe demultiplexed separately before joining the results for each individual\ - \ well.\n" + description: "Reverse reads in FASTQ format. Multiple files corresponding to different\ + \ lanes can be provided which will\nbe demultiplexed separately before joining\ + \ the results for each individual well.\n" info: null must_exist: true create_parent: true @@ -80,26 +80,25 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" + - type: "string" + name: "--sample_id" + description: "Sample ID for the provided input files. If not provided, the value\ + \ of --id\nwill be used. Input files will allways be demultiplexed separately,\n\ + but the FASTQs for wells with matching sample IDs will be concatenated before\ + \ mapping.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" - name: "Output arguments" arguments: - type: "file" - name: "--fastq_output_r1" - description: "List of demultiplexed fastq files" + name: "--fastq_output" + description: "Directory containing output fastq files" info: null default: - - "fastq/*_R1_001.fastq" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: true - multiple_sep: ";" - - type: "file" - name: "--fastq_output_r2" - description: "List of demultiplexed fastq files" - info: null - default: - - "fastq/*_R2_001.fastq" + - "fastq/*" must_exist: true create_parent: true required: true @@ -240,6 +239,9 @@ dependencies: - name: "report/create_report" repository: type: "local" +- name: "utils/concatRuns" + repository: + type: "local" repositories: - type: "local" name: "local" @@ -325,9 +327,10 @@ build_info: engine: "native|native" output: "target/nextflow/workflows/htrnaseq" executable: "target/nextflow/workflows/htrnaseq/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" dependencies: - "target/nextflow/stats/combine_star_logs" - "target/nextflow/stats/generate_pool_statistics" @@ -339,15 +342,32 @@ build_info: - "target/nextflow/eset/create_fdata" - "target/nextflow/eset/create_pdata" - "target/nextflow/report/create_report" + - "target/nextflow/utils/concatRuns" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -359,11 +379,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/workflows/htrnaseq/main.nf b/target/nextflow/workflows/htrnaseq/main.nf index ae5c5204..9c916bb4 100644 --- a/target/nextflow/workflows/htrnaseq/main.nf +++ b/target/nextflow/workflows/htrnaseq/main.nf @@ -1,6 +1,6 @@ // htrnaseq update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3071,7 +3066,7 @@ meta = [ { "type" : "file", "name" : "--input_r1", - "description" : "Forward reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n", + "description" : "Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n", "must_exist" : true, "create_parent" : true, "required" : true, @@ -3082,7 +3077,7 @@ meta = [ { "type" : "file", "name" : "--input_r2", - "description" : "Reverse reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n", + "description" : "Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n", "must_exist" : true, "create_parent" : true, "required" : true, @@ -3132,6 +3127,15 @@ meta = [ "direction" : "input", "multiple" : false, "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--sample_id", + "description" : "Sample ID for the provided input files. If not provided, the value of --id\nwill be used. Input files will allways be demultiplexed separately,\nbut the FASTQs for wells with matching sample IDs will be concatenated before mapping.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" } ] }, @@ -3140,24 +3144,10 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--fastq_output_r1", - "description" : "List of demultiplexed fastq files", + "name" : "--fastq_output", + "description" : "Directory containing output fastq files", "default" : [ - "fastq/*_R1_001.fastq" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "output", - "multiple" : true, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--fastq_output_r2", - "description" : "List of demultiplexed fastq files", - "default" : [ - "fastq/*_R2_001.fastq" + "fastq/*" ], "must_exist" : true, "create_parent" : true, @@ -3358,6 +3348,12 @@ meta = [ "repository" : { "type" : "local" } + }, + { + "name" : "utils/concatRuns", + "repository" : { + "type" : "local" + } } ], "repositories" : [ @@ -3463,14 +3459,16 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/workflows/htrnaseq", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3479,7 +3477,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3490,11 +3488,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3518,39 +3518,123 @@ include { create_eset } from "${meta.resources_dir}/../../../nextflow/eset/creat include { create_fdata } from "${meta.resources_dir}/../../../nextflow/eset/create_fdata/main.nf" include { create_pdata } from "${meta.resources_dir}/../../../nextflow/eset/create_pdata/main.nf" include { create_report } from "${meta.resources_dir}/../../../nextflow/report/create_report/main.nf" +include { concatRuns } from "${meta.resources_dir}/../../../nextflow/utils/concatRuns/main.nf" // inner workflow // user-provided Nextflow code workflow run_wf { take: - input_ch + raw_ch main: + input_ch = raw_ch + // Use the event ID as the default for the sample ID + | map {id, state -> + def sample_id = state.sample_id ?: id + def newState = state + ["sample_id": sample_id, "run_id": id] + return [id, newState] + } + // The featureData only has one requirement: the genome annotation. - // It can be generated straight away. + // It can be generated straight away. Most of the time, there is one shared + // annotation for all of the inputs and the fData should only be calculated once. + // The state is manpulated in such a way that there is one event created per unique + // input annotation file. In turn, the featureData file can joined into the original input + // channel which allows it to be shared across events if required. f_data_ch = input_ch + | toSortedList() + | flatMap {ids_and_states -> + def annotation_files = ids_and_states.inject([:]){ old_state, id_and_state -> + def (id, state) = id_and_state + def annotation_file = state.annotation + def new_state = old_state + [(annotation_file): (old_state.getOrDefault(annotation_file, []) + [id])] + return new_state + } + def file_names = annotation_files.keySet().collect{it.name} + assert (file_names.toSet().size() == file_names.size()), + "Please make sure that the annotation files have unique file names." + def new_states = annotation_files.collect{annotation_file, value -> + def new_state = [annotation_file.name , ["annotation": annotation_file, "event_ids": value]] + return new_state + } + return new_states + } | create_fdata.run( directives: [label: ["lowmem", "lowcpu"]], fromState: [ "gtf": "annotation", "output": "f_data" ], - toState: {id, result, state -> ["f_data": result.output]} + toState: ["f_data": "output"] ) + | flatMap {_, state -> + def new_states = state.event_ids.collect{event_id -> + [event_id, ["f_data": state.f_data]] + } + return new_states + } // Perform mapping of each well. - mapping_ch = input_ch + demultiplex_ch = input_ch | well_demultiplex.run( fromState: [ "input_r1": "input_r1", "input_r2": "input_r2", "barcodesFasta": "barcodesFasta", ], - toState: [ - "input_r1": "output_r1", - "input_r2": "output_r2", - ] + toState: {id, result, state -> + def all_fastq = result.output_r1 + result.output_r2 + def output_dir = all_fastq.collect{it.parent}.unique() + assert output_dir.size() == 1, "Expected output from well demultiplexing to reside into one directory." + def new_state = state + [ + "input_r1": result.output_r1, + "input_r2": result.output_r2, + "fastq_output_directory": output_dir[0], + ] + return new_state + } ) + + fastq_output_directory_ch = demultiplex_ch + | map {id, state -> + def new_event = [state.sample_id, state] + return new_event + } + | groupTuple(by: 0, sort: "hash") + | map {id, states -> + def fastq_output_dirs = states.collect{it.fastq_output_directory} + def new_state = ["fastq_output_directory": fastq_output_dirs] + def new_event = [id, new_state] + return [id, new_state] + } + + + concat_samples_ch = demultiplex_ch.join(f_data_ch) + | map {id, demutliplex_state, f_data_state -> + def newState = demutliplex_state + ["f_data": f_data_state["f_data"]] + [id, newState] + } + | concatRuns.run( + fromState: [ + "input_r1": "input_r1", + "input_r2": "input_r2", + "sample_id": "sample_id", + ], + toState: {id, result, state -> + def state_overwite = [ + "input_r1": result.output_r1, + "input_r2": result.output_r2, + "_meta": ["join_id": state.run_id] + ] + return state + state_overwite + } + ) + + pool_ch = concat_samples_ch.join(fastq_output_directory_ch) + | map {id, demux_state, fastq_output_directory_state -> + def new_state = demux_state + fastq_output_directory_state + return [id, new_state] + } | parallel_map.run( directives: ["label": ["highmem", "lowcpu"]], fromState: {id, state -> @@ -3567,9 +3651,6 @@ workflow run_wf { "star_output": "output", ] ) - - // From the mapped wells, create statistics based on the BAM files. - pool_ch = mapping_ch // Split the events from 1 event per pool into events per well // and add extra metadata about the wells to the state. | well_metadata.run( @@ -3690,7 +3771,7 @@ workflow run_wf { ] ) - p_data_ch = star_logs_ch.join(pool_statistics_ch, remainder: true) + eset_ch = star_logs_ch.join(pool_statistics_ch, remainder: true) | map {id, star_logs_state, pool_statistics_state -> def newState = star_logs_state + ["nrReadsNrGenesPerChromPool": pool_statistics_state.nrReadsNrGenesPerChromPool] return [id, newState] @@ -3704,12 +3785,6 @@ workflow run_wf { ], toState: ["p_data": "output"], ) - - eset_ch = p_data_ch.join(f_data_ch, remainder: true) - | map {id, p_data_state, f_data_state -> - def newState = p_data_state + ["f_data": f_data_state["f_data"]] - [id, newState] - } | create_eset.run( directives: [label: ["lowmem", "lowcpu"]], fromState: [ @@ -3751,13 +3826,14 @@ workflow run_wf { output_ch = eset_ch.join(report_channel) | map {id, state_eset, state_report -> - def new_state = state_eset + ["html_report": state_report.html_report] + def new_state = state_eset + [ + "html_report": state_report.html_report, + ] [id, new_state] } | setState([ - "star_output": "star_output", - "fastq_output_r1": "input_r1", - "fastq_output_r2": "input_r2", + "star_output": "star_output", + "fastq_output": "fastq_output_directory", "star_output": "star_output", "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChromPool", "star_qc_metrics": "star_qc_metrics", @@ -3765,6 +3841,7 @@ workflow run_wf { "f_data": "f_data", "p_data": "p_data", "html_report": "html_report", + "_meta": "_meta", ]) diff --git a/target/nextflow/workflows/htrnaseq/nextflow_schema.json b/target/nextflow/workflows/htrnaseq/nextflow_schema.json index 760e4673..82dfb8cb 100644 --- a/target/nextflow/workflows/htrnaseq/nextflow_schema.json +++ b/target/nextflow/workflows/htrnaseq/nextflow_schema.json @@ -18,7 +18,7 @@ "type": "string", "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format", - "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n" + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n" } @@ -28,7 +28,7 @@ "type": "string", "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format", - "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n" + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n" } @@ -74,6 +74,16 @@ } + , + "sample_id": { + "type": + "string", + "description": "Type: `string`. Sample ID for the provided input files", + "help_text": "Type: `string`. Sample ID for the provided input files. If not provided, the value of --id\nwill be used. Input files will allways be demultiplexed separately,\nbut the FASTQs for wells with matching sample IDs will be concatenated before mapping.\n" + + } + + } }, @@ -85,24 +95,13 @@ "properties": { - "fastq_output_r1": { + "fastq_output": { "type": "string", - "description": "Type: List of `file`, required, default: `$id.$key.fastq_output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files", - "help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files" + "description": "Type: List of `file`, required, default: `fastq/*`, multiple_sep: `\";\"`. Directory containing output fastq files", + "help_text": "Type: List of `file`, required, default: `fastq/*`, multiple_sep: `\";\"`. Directory containing output fastq files" , - "default":"$id.$key.fastq_output_r1_*.fastq" - } - - - , - "fastq_output_r2": { - "type": - "string", - "description": "Type: List of `file`, required, default: `$id.$key.fastq_output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files", - "help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files" - , - "default":"$id.$key.fastq_output_r2_*.fastq" + "default":"fastq/*" } @@ -110,10 +109,10 @@ "star_output": { "type": "string", - "description": "Type: List of `file`, required, default: `$id.$key.star_output_*.$id/*`, multiple_sep: `\";\"`. Output from mapping with STAR", - "help_text": "Type: List of `file`, required, default: `$id.$key.star_output_*.$id/*`, multiple_sep: `\";\"`. Output from mapping with STAR" + "description": "Type: List of `file`, required, default: `star.$id/*`, multiple_sep: `\";\"`. Output from mapping with STAR", + "help_text": "Type: List of `file`, required, default: `star.$id/*`, multiple_sep: `\";\"`. Output from mapping with STAR" , - "default":"$id.$key.star_output_*.$id/*" + "default":"star.$id/*" } @@ -121,10 +120,10 @@ "nrReadsNrGenesPerChrom": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. ", - "help_text": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. " + "description": "Type: `file`, required, default: `nrReadsNrGenesPerChrom.$id.txt`. ", + "help_text": "Type: `file`, required, default: `nrReadsNrGenesPerChrom.$id.txt`. " , - "default":"$id.$key.nrReadsNrGenesPerChrom.txt" + "default":"nrReadsNrGenesPerChrom.$id.txt" } @@ -132,10 +131,10 @@ "star_qc_metrics": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.star_qc_metrics.txt`. ", - "help_text": "Type: `file`, required, default: `$id.$key.star_qc_metrics.txt`. " + "description": "Type: `file`, required, default: `starLogs.$id.txt`. ", + "help_text": "Type: `file`, required, default: `starLogs.$id.txt`. " , - "default":"$id.$key.star_qc_metrics.txt" + "default":"starLogs.$id.txt" } @@ -143,10 +142,10 @@ "eset": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.eset.rds`. ", - "help_text": "Type: `file`, required, default: `$id.$key.eset.rds`. " + "description": "Type: `file`, required, default: `eset.$id.rds`. ", + "help_text": "Type: `file`, required, default: `eset.$id.rds`. " , - "default":"$id.$key.eset.rds" + "default":"eset.$id.rds" } @@ -154,10 +153,10 @@ "f_data": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.f_data.tsv`. ", - "help_text": "Type: `file`, required, default: `$id.$key.f_data.tsv`. " + "description": "Type: `file`, required, default: `fData.$id.tsv`. ", + "help_text": "Type: `file`, required, default: `fData.$id.tsv`. " , - "default":"$id.$key.f_data.tsv" + "default":"fData.$id.tsv" } @@ -165,10 +164,10 @@ "p_data": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.p_data.tsv`. ", - "help_text": "Type: `file`, required, default: `$id.$key.p_data.tsv`. " + "description": "Type: `file`, required, default: `pData.$id.tsv`. ", + "help_text": "Type: `file`, required, default: `pData.$id.tsv`. " , - "default":"$id.$key.p_data.tsv" + "default":"pData.$id.tsv" } @@ -176,10 +175,10 @@ "html_report": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.html_report.html`. ", - "help_text": "Type: `file`, required, default: `$id.$key.html_report.html`. " + "description": "Type: `file`, required, default: `report.$id.html`. ", + "help_text": "Type: `file`, required, default: `report.$id.html`. " , - "default":"$id.$key.html_report.html" + "default":"report.$id.html" } diff --git a/target/nextflow/workflows/runner/.config.vsh.yaml b/target/nextflow/workflows/runner/.config.vsh.yaml index 44be24aa..c75bff00 100644 --- a/target/nextflow/workflows/runner/.config.vsh.yaml +++ b/target/nextflow/workflows/runner/.config.vsh.yaml @@ -12,7 +12,7 @@ argument_groups: create_parent: true required: true direction: "input" - multiple: true + multiple: false multiple_sep: ";" - type: "file" name: "--barcodesFasta" @@ -220,9 +220,10 @@ build_info: engine: "native|native" output: "target/nextflow/workflows/runner" executable: "target/nextflow/workflows/runner/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" dependencies: - "target/nextflow/utils/listInputDir" - "target/nextflow/workflows/htrnaseq" @@ -231,12 +232,28 @@ build_info: package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -248,11 +265,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/workflows/runner/main.nf b/target/nextflow/workflows/runner/main.nf index fc08d0bc..8d26d5ce 100644 --- a/target/nextflow/workflows/runner/main.nf +++ b/target/nextflow/workflows/runner/main.nf @@ -1,6 +1,6 @@ // runner update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3050,7 +3045,7 @@ meta = [ "create_parent" : true, "required" : true, "direction" : "input", - "multiple" : true, + "multiple" : false, "multiple_sep" : ";" }, { @@ -3316,14 +3311,16 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/workflows/runner", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3332,7 +3329,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3343,11 +3340,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3378,19 +3377,13 @@ workflow run_wf { input_ch main: - output_ch = input_ch - // Multiple runs can be provided, and the reads for these runs will - // be concatenated. Here, we gather the FASTQ files from each input directory first. - | flatMap {id, state -> - // Create an input event per input directory - def new_state = state.input.withIndex().collect{input_dir, id_index -> - def state_item = state + ["input": input_dir, "index": id_index, "run_id": id] - return ["${id}_${id_index}".toString(), state_item] - } - return new_state - } + htrnaseq_ch = input_ch // List the FASTQ files per input directory // Be careful: an event per lane is created! + | map {id, state -> + def new_state = state + ["run_id": id] + return [id, new_state] + } | listInputDir.run( fromState: [ "input": "input", @@ -3408,13 +3401,11 @@ workflow run_wf { // there might be multiple FASTQs for a single sample that correspond to the // lanes. So the fastq files must be gathered across lanes and input folders // in order to create an input lists for R1 and R2. - | map {id, state -> [state.sample_id, state]} - | groupTuple(by: 0, sort: { state1, state2 -> - if (state1.index == state2.index) { - return state1.lane <=> state2.lane - } - return state1.index <=> state2.index - }) + // The ID of the event here is important! It determines the name of the output + // folders for the FASTQ files and these folders are published as-is later. + // The folder where the FASTQ files are stored in should be named after the run ID. + | map {id, state -> ["${state.sample_id}/${state.run_id}".toString(), state]} + | groupTuple(by: 0, sort: "hash") | map {id, states -> def new_r1 = states.collect{it.r1_output} def new_r2 = states.collect{it.r2_output} @@ -3423,7 +3414,7 @@ workflow run_wf { // TODO: this can be asserted. def new_state = states[0] + [ "r1": new_r1, - "r2": new_r2 + "r2": new_r2, ] return [id, new_state] } @@ -3432,8 +3423,7 @@ workflow run_wf { f_data: 'fData/$id.txt', p_data: 'pData/$id.txt', star_output: 'star_output/$id/*', - fastq_output_r1: 'fastq/*_R1_001.fastq', - fastq_output_r2: 'fastq/*_R1_001.fastq', + fastq_output: 'fastq/*', eset: 'esets/$id.rds', nrReadsNrGenesPerChrom: 'nrReadsNrGenesPerChrom/$id.txt', star_qc_metrics: 'starLogs/$id.txt', @@ -3446,32 +3436,32 @@ workflow run_wf { genomeDir: "genomeDir", annotation: "annotation", umi_length: "umi_length", + sample_id: "sample_id", ], toState: { id, result, state -> state + result } ) + // The HT-RNAseq workflow outputs multiple events, one per 'pool' (usually a plate) // but for publishing the results, this is not handy because we want to use the $id // variable as a pointer to the target data. // // So, we should combine everything together // - // project_id / experiment_id / date_workflow - + // project_id / experiment_id / "data_processed" / date_workflow + grouped_ch = htrnaseq_ch | toSortedList - | map{ vs -> def all_fastqs [ vs[0][1].run_id, // The original ID [ star_output: reduce_paths(vs.collect{ it[1].star_output }.flatten()), - fastq_output_r1: reduce_paths(vs.collect{ it[1].fastq_output_r1 }.flatten(), 1), - fastq_output_r2: reduce_paths(vs.collect{ it[1].fastq_output_r2 }.flatten(), 1), nrReadsNrGenesPerChrom: reduce_paths(vs.collect{ it[1].nrReadsNrGenesPerChrom }), star_qc_metrics: reduce_paths(vs.collect{ it[1].star_qc_metrics }), eset: reduce_paths(vs.collect{ it[1].eset }), f_data: reduce_paths(vs.collect{ it[1].f_data }), p_data: reduce_paths(vs.collect{ it[1].p_data }), + fastq_output: vs.collect{ it[1].fastq_output }.flatten().unique(), html_report: vs.collect{ it[1].html_report }[0], // The report is for all pools plain_output: vs.collect{ it[1].plain_output }[0], project_id: vs.collect{ it[1].project_id }[0], @@ -3480,12 +3470,13 @@ workflow run_wf { ] } + results_publish_ch = grouped_ch | publish_results.run( fromState: { id, state -> def project = (state.plain_output) ? id : "${state.project_id}" def experiment = (state.plain_output) ? id : "${state.experiment_id}" def id0 = "${project}/${experiment}" - def id1 = (state.plain_output) ? id : "${id0}/${date}" + def id1 = (state.plain_output) ? id : "${id0}/data_processed/${date}" def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}" if (id == id2) { @@ -3516,14 +3507,24 @@ workflow run_wf { ] ) + fastq_publish_ch = grouped_ch + | flatMap{id, state -> + def new_states = state.fastq_output.collect{fastq_dir -> + def new_id = fastq_dir.name // The folder name corresponds to the run + def fastq_files = fastq_dir.listFiles() + def new_state = [ + "fastq_output": fastq_files + ] + return [new_id, new_state] + } + return new_states + } | publish_fastqs.run( fromState: { id, state -> def id0 = "${id}" def id1 = (state.plain_output) ? id : "${id0}/${date}" def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}" - println(state.plain_output) - if (id == id2) { println("Publising fastqs to ${params.fastq_publish_dir}") } else { @@ -3531,8 +3532,7 @@ workflow run_wf { } [ - input_r1: state.fastq_output_r1, - input_r2: state.fastq_output_r2, + input: state.fastq_output, output: "${id2}", ] }, @@ -3547,7 +3547,7 @@ workflow run_wf { ) emit: - output_ch + grouped_ch | map{ id, state -> [ id, [ _meta: [ join_id: state.run_id ] ] ] } } diff --git a/target/nextflow/workflows/runner/nextflow_schema.json b/target/nextflow/workflows/runner/nextflow_schema.json index e88fb039..a8a0169c 100644 --- a/target/nextflow/workflows/runner/nextflow_schema.json +++ b/target/nextflow/workflows/runner/nextflow_schema.json @@ -17,8 +17,8 @@ "input": { "type": "string", - "description": "Type: List of `file`, required, multiple_sep: `\";\"`. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`", - "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`" + "description": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`", + "help_text": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`" } diff --git a/target/nextflow/workflows/well_demultiplex/.config.vsh.yaml b/target/nextflow/workflows/well_demultiplex/.config.vsh.yaml index d58dda16..0e2ca977 100644 --- a/target/nextflow/workflows/well_demultiplex/.config.vsh.yaml +++ b/target/nextflow/workflows/well_demultiplex/.config.vsh.yaml @@ -213,21 +213,38 @@ build_info: engine: "native|native" output: "target/nextflow/workflows/well_demultiplex" executable: "target/nextflow/workflows/well_demultiplex/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" dependencies: - "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/cutadapt" - "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/concat_text" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -239,11 +256,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/workflows/well_demultiplex/main.nf b/target/nextflow/workflows/well_demultiplex/main.nf index 1af7ff89..285540f6 100644 --- a/target/nextflow/workflows/well_demultiplex/main.nf +++ b/target/nextflow/workflows/well_demultiplex/main.nf @@ -1,6 +1,6 @@ // well_demultiplex update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -86,64 +86,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -155,10 +147,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3318,14 +3313,16 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/workflows/well_demultiplex", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3334,7 +3331,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3345,11 +3342,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", @@ -3430,6 +3429,8 @@ workflow run_wf { output: new_output, error_rate: 0.10, demultiplex_mode: "single", + output_r1: state.output_r1, + output_r2: state.output_r2, ] }, toState: { id, result, state -> diff --git a/target/nextflow/workflows/well_demultiplex/nextflow_schema.json b/target/nextflow/workflows/well_demultiplex/nextflow_schema.json index 8be4ff28..9b7209ac 100644 --- a/target/nextflow/workflows/well_demultiplex/nextflow_schema.json +++ b/target/nextflow/workflows/well_demultiplex/nextflow_schema.json @@ -57,10 +57,10 @@ "output_r1": { "type": "string", - "description": "Type: List of `file`, required, default: `$id.$key.output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files", - "help_text": "Type: List of `file`, required, default: `$id.$key.output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files" + "description": "Type: List of `file`, required, default: `fastq/*_R1_001.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files", + "help_text": "Type: List of `file`, required, default: `fastq/*_R1_001.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files" , - "default":"$id.$key.output_r1_*.fastq" + "default":"fastq/*_R1_001.fastq" } @@ -68,10 +68,10 @@ "output_r2": { "type": "string", - "description": "Type: List of `file`, required, default: `$id.$key.output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files", - "help_text": "Type: List of `file`, required, default: `$id.$key.output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files" + "description": "Type: List of `file`, required, default: `fastq/*_R2_001.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files", + "help_text": "Type: List of `file`, required, default: `fastq/*_R2_001.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files" , - "default":"$id.$key.output_r2_*.fastq" + "default":"fastq/*_R2_001.fastq" } diff --git a/target/nextflow/workflows/well_metadata/.config.vsh.yaml b/target/nextflow/workflows/well_metadata/.config.vsh.yaml index 42b0ce0e..7b25df89 100644 --- a/target/nextflow/workflows/well_metadata/.config.vsh.yaml +++ b/target/nextflow/workflows/well_metadata/.config.vsh.yaml @@ -211,18 +211,35 @@ build_info: engine: "native|native" output: "target/nextflow/workflows/well_metadata" executable: "target/nextflow/workflows/well_metadata/main.nf" - viash_version: "0.9.2" - git_commit: "d157606b49b157cd2955acf9124f9043fbd0ca5a" + viash_version: "0.9.4" + git_commit: "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e" git_remote: "https://github.com/viash-hub/htrnaseq" + git_tag: "v0.7.2-6-gf1f0c98" package_config: name: "htrnaseq" version: "update-resources" - description: "High-throughput pipeline [WIP]\n" + summary: "A workflow for high-throughput RNA-seq data analyses.\n" + description: "This workflow is designed to process high-throughput RNA-seq data,\ + \ where every\nwell of a microarray plate is a sample. A fasta file provided as\ + \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\ + \ is built in a modular fashion, where most of the base functionality\nis provided\ + \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\ + supplemented by custom base components and workflow components in this package.\n\ + \nThe full workflow is split in two major subworkflows that can be run independently:\n\ + \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\ + \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\ + \ QC reports.\n\nEach of those can be started individually, or the full workflow\ + \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\ + \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\ + \ where a\nnumber of choices (input/output structure and location) have been made.\n\ + \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\ + \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\ + \ first.\n" info: test_resources: - path: "gs://viash-hub-resources/htrnaseq/v1" dest: "resources_test" - viash_version: "0.9.2" + viash_version: "0.9.4" source: "src" target: "target" config_mods: @@ -234,11 +251,13 @@ package_config: - ".engines[.type == 'docker'].target_tag := 'update-resources'" keywords: - "bioinformatics" - - "sequence" + - "sequencing" - "high-throughput" + - "RNAseq" - "mapping" - "counting" - "pipeline" + - "workflow" license: "MIT" organization: "vsh" links: diff --git a/target/nextflow/workflows/well_metadata/main.nf b/target/nextflow/workflows/well_metadata/main.nf index c733e303..d99b2223 100644 --- a/target/nextflow/workflows/well_metadata/main.nf +++ b/target/nextflow/workflows/well_metadata/main.nf @@ -1,6 +1,6 @@ // well_metadata update-resources // -// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -85,64 +85,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -154,10 +146,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -3298,14 +3293,16 @@ meta = [ "runner" : "nextflow", "engine" : "native|native", "output" : "target/nextflow/workflows/well_metadata", - "viash_version" : "0.9.2", - "git_commit" : "d157606b49b157cd2955acf9124f9043fbd0ca5a", - "git_remote" : "https://github.com/viash-hub/htrnaseq" + "viash_version" : "0.9.4", + "git_commit" : "f1f0c98bba1e84f0608978a576a49f2cc8a9aa3e", + "git_remote" : "https://github.com/viash-hub/htrnaseq", + "git_tag" : "v0.7.2-6-gf1f0c98" }, "package_config" : { "name" : "htrnaseq", "version" : "update-resources", - "description" : "High-throughput pipeline [WIP]\n", + "summary" : "A workflow for high-throughput RNA-seq data analyses.\n", + "description" : "This workflow is designed to process high-throughput RNA-seq data, where every\nwell of a microarray plate is a sample. A fasta file provided as input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow is built in a modular fashion, where most of the base functionality\nis provided by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\nsupplemented by custom base components and workflow components in this package.\n\nThe full workflow is split in two major subworkflows that can be run independently:\n\n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate QC reports.\n\nEach of those can be started individually, or the full workflow can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq) \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner) where a\nnumber of choices (input/output structure and location) have been made.\n\nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex) first.\n", "info" : { "test_resources" : [ { @@ -3314,7 +3311,7 @@ meta = [ } ] }, - "viash_version" : "0.9.2", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ @@ -3325,11 +3322,13 @@ meta = [ ], "keywords" : [ "bioinformatics", - "sequence", + "sequencing", "high-throughput", + "RNAseq", "mapping", "counting", - "pipeline" + "pipeline", + "workflow" ], "license" : "MIT", "organization" : "vsh", diff --git a/target/nextflow/workflows/well_metadata/nextflow_schema.json b/target/nextflow/workflows/well_metadata/nextflow_schema.json index f2f2f1ba..5418332f 100644 --- a/target/nextflow/workflows/well_metadata/nextflow_schema.json +++ b/target/nextflow/workflows/well_metadata/nextflow_schema.json @@ -67,10 +67,10 @@ "output_r1": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_r1.output_r1`. Output fastq file", - "help_text": "Type: `file`, required, default: `$id.$key.output_r1.output_r1`. Output fastq file." + "description": "Type: `file`, required, default: `$id.$key.output_r1`. Output fastq file", + "help_text": "Type: `file`, required, default: `$id.$key.output_r1`. Output fastq file." , - "default":"$id.$key.output_r1.output_r1" + "default":"$id.$key.output_r1" } @@ -78,10 +78,10 @@ "output_r2": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output_r2.output_r2`. Output fastq file", - "help_text": "Type: `file`, required, default: `$id.$key.output_r2.output_r2`. Output fastq file." + "description": "Type: `file`, required, default: `$id.$key.output_r2`. Output fastq file", + "help_text": "Type: `file`, required, default: `$id.$key.output_r2`. Output fastq file." , - "default":"$id.$key.output_r2.output_r2" + "default":"$id.$key.output_r2" } @@ -129,10 +129,10 @@ "well_star_mapping": { "type": "string", - "description": "Type: `file`, default: `$id.$key.well_star_mapping.well_star_mapping`. ", - "help_text": "Type: `file`, default: `$id.$key.well_star_mapping.well_star_mapping`. " + "description": "Type: `file`, default: `$id.$key.well_star_mapping`. ", + "help_text": "Type: `file`, default: `$id.$key.well_star_mapping`. " , - "default":"$id.$key.well_star_mapping.well_star_mapping" + "default":"$id.$key.well_star_mapping" }