diff --git a/CHANGELOG.md b/CHANGELOG.md index 1db44d7..ebe5b02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# toolbox unreleased +# toolbox v0.1.1 ## MINOR CHANGES @@ -6,6 +6,10 @@ * Bump viash to 0.9.0 (PR #10). +* Bump viash to 0.9.4 (PR #13). + +* Update README (PR #13). + # toolbox v0.1.0 ## NEW FEATURES diff --git a/README.md b/README.md index 9af5344..4ffb898 100644 --- a/README.md +++ b/README.md @@ -2,41 +2,123 @@ # 🛠📦 toolbox -[![ViashHub](https://img.shields.io/badge/ViashHub-toolbox-7a4baa.png)](https://web.viash-hub.com/packages/toolbox) -[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Ftoolbox-blue.png)](https://github.com/viash-hub/toolbox) +[![ViashHub](https://img.shields.io/badge/ViashHub-toolbox-7a4baa.svg)](https://www.viash-hub.com/packages/toolbox) +[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2Ftoolbox-blue.svg)](https://github.com/viash-hub/toolbox) [![GitHub -License](https://img.shields.io/github/license/viash-hub/toolbox.png)](https://github.com/viash-hub/toolbox/blob/main/LICENSE) +License](https://img.shields.io/github/license/viash-hub/toolbox.svg)](https://github.com/viash-hub/toolbox/blob/main/LICENSE) [![GitHub -Issues](https://img.shields.io/github/issues/viash-hub/toolbox.png)](https://github.com/viash-hub/toolbox/issues) +Issues](https://img.shields.io/github/issues/viash-hub/toolbox.svg)](https://github.com/viash-hub/toolbox/issues) [![Viash -version](https://img.shields.io/badge/Viash-v0.9.0--RC7-blue)](https://viash.io) +version](https://img.shields.io/badge/Viash-v0.9.4-blue.svg)](https://viash.io) -A collection of command-line tools. +A collection of curated command-line tools for general IT tasks, built +with Viash. -## Objectives +## Introduction -- **Reusability**: Facilitating the use of components across various - projects and contexts. -- **Reproducibility**: Ensuring that components are reproducible and can - be easily shared. -- **Best Practices**: Adhering to established standards in software - development and bioinformatics. +`toolbox` provides a versatile suite of IT components, following the +robust Viash (https://viash.io) framework. This package focuses on +delivering reliable, standalone tools that can be easily integrated into +larger computational workflows. + +The core philosophy emphasizes **reusability**, **reproducibility**, and +adherence to **best practices** in component creation. Key features of +`toolbox` components include: + +- **Standalone & Nextflow Ready:** Execute components directly from the + command line or seamlessly incorporate them into Nextflow workflows. +- **High Quality Standards:** + - Comprehensive documentation for each component and its parameters. + - Full exposure of the underlying tool’s arguments for maximum + flexibility. + - Containerized (Docker) to ensure consistent environments and manage + dependencies, leading to enhanced reproducibility. + - Unit tested to verify functionality and ensure reliability. + +## Example Usage + +Viash components in toolbox can be run in various ways: + +``` mermaid lang="mermaid" +flowchart TD + A[toolbox v0.1.0] --> B(Viash Hub Launch) + A --> C(Viash CLI) + A --> D(Nextflow CLI) + A --> E(Seqera Cloud) + A --> F(As a dependency) +``` + +### 1. Via the Viash Hub Launch interface + +You can run this component directly from the Viash Hub [Launch +interface](https://www.viash-hub.com/launch?package=toolbox&version=v0.1.0&component=yq&runner=Executable). + +![](docs/viash-hub.png) + +### 2. Via the Viash CLI + +You can run this component directly from the command line using the +Viash CLI. + +``` bash +viash run vsh://toolbox@v0.1.0/yq -- --help + +viash run vsh://toolbox@v0.1.0/yq -- \ + --input path/to/input.yaml \ + --output output.yaml +``` + +This will run the component with the specified input files and output +the results to the specified output file. + +### 3. Via the Nextflow CLI or Seqera Cloud + +You can run this component as a Nextflow pipeline. + +``` bash +nextflow run https://packages.viash-hub.com/vsh/toolbox \ + -revision v0.1.0 \ + -main-script target/nextflow/yq/main.nf \ + -latest -resume \ + -profile docker \ + --input path/to/input.yaml \ + --publish_dir path/to/output +``` + +**Note:** Make sure that the [Nextflow +SCM](https://www.nextflow.io/docs/latest/git.html#git-configuration) is +set up properly. You can do this by adding the following lines to your +`~/.nextflow/scm` file: + +``` groovy +providers.vsh.platform = 'gitlab' +providers.vsh.server = 'https://packages.viash-hub.com' +``` + +**Tip:** This will also work with Seqera Cloud or other +Nextflow-compatible platforms. + +### 4. As a dependency + +In your Viash config file (`config.vsh.yaml`), you can add this +component as a dependency: + +``` yaml +dependencies: + - name: yq + repository: vsh://toolbox@v0.1.0 +``` + +**Tip:** See the [Viash +documentation](https://viash.io/guide/nextflow_vdsl3/create-a-pipeline.html#pipeline-as-a-component) +for more details on how to use Viash components as a dependency in your +own Nextflow workflows. ## Contributing -We encourage contributions from the community. To contribute: - -1. **Fork the Repository**: Start by forking this repository to your - account. -2. **Develop Your Component**: Create your Viash component, ensuring it - aligns with our best practices (detailed below). -3. **Submit a Pull Request**: After testing your component, submit a - pull request for review. - -## Contribution Guidelines - -The contribution guidelines describes which steps you should follow to -contribute a component to this repository. +Contributions are welcome! We aim to build a comprehensive collection of +high-quality bioinformatics components. If you’d like to contribute, +please follow these general steps: 1. Find a component to contribute 2. Add config template @@ -55,18 +137,3 @@ contribute a component to this repository. See the [CONTRIBUTING](https://github.com/viash-hub/toolbox/blob/main/CONTRIBUTING.md) file for more details. - -## Support and Community - -For support, questions, or to join our community: - -- **Issues**: Submit questions or issues via the [GitHub issue - tracker](https://github.com/viash-hub/toolbox/issues). -- **Discussions**: Join our discussions via [GitHub - Discussions](https://github.com/viash-hub/toolbox/discussions). - -## License - -This repository is licensed under an MIT license. See the -[LICENSE](https://github.com/viash-hub/toolbox/blob/main/LICENSE) file -for details. diff --git a/README.qmd b/README.qmd index 7a3bbaa..23e0f1a 100644 --- a/README.qmd +++ b/README.qmd @@ -2,37 +2,105 @@ format: gfm --- ```{r setup, include=FALSE} -project <- yaml::read_yaml("_viash.yaml") -license <- paste0(project$links$repository, "/blob/main/LICENSE") -contributing <- paste0(project$links$repository, "/blob/main/CONTRIBUTING.md") +package <- yaml::read_yaml("_viash.yaml") +license <- paste0(package$links$repository, "/blob/main/LICENSE") +contributing <- paste0(package$links$repository, "/blob/main/CONTRIBUTING.md") + +pkg <- package$name +ver <- if (!is.null(package$version)) package$version else "v0.1.0" +comp <- "yq" ``` -# 🛠📦 `r project$name` +# 🛠📦 `r pkg` -[![ViashHub](https://img.shields.io/badge/ViashHub-`r project$name`-7a4baa)](https://web.viash-hub.com/packages/`r project$name`) -[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2F`r project$name`-blue)](`r project$links$repository`) -[![GitHub License](https://img.shields.io/github/license/viash-hub/`r project$name`)](`r license`) -[![GitHub Issues](https://img.shields.io/github/issues/viash-hub/`r project$name`)](`r project$links$issue_tracker`) -[![Viash version](https://img.shields.io/badge/Viash-v`r gsub("-", "--", project$viash_version)`-blue)](https://viash.io) +[![ViashHub](https://img.shields.io/badge/ViashHub-`r pkg`-7a4baa.svg)](https://www.viash-hub.com/packages/`r pkg`) +[![GitHub](https://img.shields.io/badge/GitHub-viash--hub%2F`r pkg`-blue.svg)](`r package$links$repository`) +[![GitHub License](https://img.shields.io/github/license/viash-hub/`r pkg`.svg)](`r license`) +[![GitHub Issues](https://img.shields.io/github/issues/viash-hub/`r pkg`.svg)](`r package$links$issue_tracker`) +[![Viash version](https://img.shields.io/badge/Viash-v`r gsub("-", "--", package$viash_version)`-blue.svg)](https://viash.io) -`r project$description` +`r package$summary` -## Objectives +## Introduction -- **Reusability**: Facilitating the use of components across various projects and contexts. -- **Reproducibility**: Ensuring that components are reproducible and can be easily shared. -- **Best Practices**: Adhering to established standards in software development and bioinformatics. +`r package$description` + +## Example Usage + +Viash components in `r pkg` can be run in various ways: + +```{r mmd, echo=FALSE, results='asis'} +cat( + "```mermaid\n", + "flowchart TD\n", + " A[", pkg, " ", ver, "] --> B(Viash Hub Launch)\n", + " A --> C(Viash CLI)\n", + " A --> D(Nextflow CLI)\n", + " A --> E(Seqera Cloud)\n", + " A --> F(As a dependency)\n", + "```\n", + sep = "" +) +``` + +### 1. Via the Viash Hub Launch interface + +You can run this component directly from the Viash Hub [Launch interface](https://www.viash-hub.com/launch?package=`r pkg`&version=`r ver`&component=`r comp`&runner=Executable). + +![](docs/viash-hub.png) + +### 2. Via the Viash CLI + +You can run this component directly from the command line using the Viash CLI. + +```bash +viash run vsh://`r pkg`@`r ver`/`r comp` -- --help + +viash run vsh://`r pkg`@`r ver`/`r comp` -- \ + --input path/to/input.yaml \ + --output output.yaml +``` + +This will run the component with the specified input files and output the results to the specified output file. + +### 3. Via the Nextflow CLI or Seqera Cloud + +You can run this component as a Nextflow pipeline. + +```bash +nextflow run https://packages.viash-hub.com/vsh/`r pkg` \ + -revision `r ver` \ + -main-script target/nextflow/`r comp`/main.nf \ + -latest -resume \ + -profile docker \ + --input path/to/input.yaml \ + --publish_dir path/to/output +``` + +**Note:** Make sure that the [Nextflow SCM](https://www.nextflow.io/docs/latest/git.html#git-configuration) is set up properly. You can do this by adding the following lines to your `~/.nextflow/scm` file: + +```groovy +providers.vsh.platform = 'gitlab' +providers.vsh.server = 'https://packages.viash-hub.com' +``` + +**Tip:** This will also work with Seqera Cloud or other Nextflow-compatible platforms. + +### 4. As a dependency + +In your Viash config file (`config.vsh.yaml`), you can add this component as a dependency: + +```yaml +dependencies: + - name: `r comp` + repository: vsh://`r pkg`@`r ver` +``` + +**Tip:** See the [Viash documentation](https://viash.io/guide/nextflow_vdsl3/create-a-pipeline.html#pipeline-as-a-component) for more details on how to use Viash components as a dependency in your own Nextflow workflows. ## Contributing -We encourage contributions from the community. To contribute: +Contributions are welcome! We aim to build a comprehensive collection of high-quality bioinformatics components. If you'd like to contribute, please follow these general steps: -1. **Fork the Repository**: Start by forking this repository to your account. -2. **Develop Your Component**: Create your Viash component, ensuring it aligns with our best practices (detailed below). -3. **Submit a Pull Request**: After testing your component, submit a pull request for review. - -## Contribution Guidelines - -The contribution guidelines describes which steps you should follow to contribute a component to this repository. ```{r echo=FALSE} lines <- readr::read_lines("CONTRIBUTING.md") @@ -49,14 +117,3 @@ knitr::asis_output( ``` See the [CONTRIBUTING](`r contributing`) file for more details. - - -## Support and Community - -For support, questions, or to join our community: - -- **Issues**: Submit questions or issues via the [GitHub issue tracker](`r project$links$issue_tracker`). -- **Discussions**: Join our discussions via [GitHub Discussions](`r project$links$repository`/discussions). - -## License -This repository is licensed under an MIT license. See the [LICENSE](`r license`) file for details. diff --git a/_viash.yaml b/_viash.yaml index 2eae174..8c9bb98 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -1,13 +1,26 @@ name: toolbox +version: v0.1.0 +summary: | + A collection of curated command-line tools for general IT tasks, built with Viash. description: | - A collection of command-line tools. + `toolbox` provides a versatile suite of IT components, following the robust Viash (https://viash.io) framework. + This package focuses on delivering reliable, standalone tools that can be easily integrated into larger computational workflows. + + The core philosophy emphasizes **reusability**, **reproducibility**, and adherence to **best practices** in component creation. Key features of `toolbox` components include: + + * **Standalone & Nextflow Ready:** Execute components directly from the command line or seamlessly incorporate them into Nextflow workflows. + * **High Quality Standards:** + * Comprehensive documentation for each component and its parameters. + * Full exposure of the underlying tool's arguments for maximum flexibility. + * Containerized (Docker) to ensure consistent environments and manage dependencies, leading to enhanced reproducibility. + * Unit tested to verify functionality and ensure reliability. license: MIT keywords: [toolbox, command-line, tools] links: issue_tracker: https://github.com/viash-hub/toolbox/issues repository: https://github.com/viash-hub/toolbox -viash_version: 0.9.0 +viash_version: 0.9.4 config_mods: | .requirements.commands := ['ps'] diff --git a/docs/viash-hub.png b/docs/viash-hub.png new file mode 100644 index 0000000..0151522 Binary files /dev/null and b/docs/viash-hub.png differ diff --git a/nextflow.config b/nextflow.config index 5a895df..998e828 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,6 +1,6 @@ manifest { name = "toolbox" - version = "main" + version = "v0.1.0" defaultBranch = "main" nextflowVersion = "!>=20.12.1-edge" } diff --git a/target/executable/bgzip/.config.vsh.yaml b/target/executable/bgzip/.config.vsh.yaml index b272a8e..166770a 100644 --- a/target/executable/bgzip/.config.vsh.yaml +++ b/target/executable/bgzip/.config.vsh.yaml @@ -1,5 +1,5 @@ name: "bgzip" -version: "main" +version: "v0.1.0" argument_groups: - name: "Inputs" arguments: @@ -126,6 +126,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -207,7 +210,7 @@ engines: id: "docker" image: "quay.io/biocontainers/htslib:1.19--h81da01d_0" target_registry: "images.viash-hub.com" - target_tag: "main" + target_tag: "v0.1.0" namespace_separator: "/" setup: - type: "docker" @@ -224,22 +227,35 @@ build_info: engine: "docker|native" output: "target/executable/bgzip" executable: "target/executable/bgzip/bgzip" - viash_version: "0.9.0" - git_commit: "a3cebc0315249e08a4a39d175670f29a800c1c5f" + viash_version: "0.9.4" + git_commit: "d33eb370683ba1f1132ec7d7b6884c8040dc901a" git_remote: "https://github.com/viash-hub/toolbox" package_config: name: "toolbox" - version: "main" - description: "A collection of command-line tools.\n" + version: "v0.1.0" + summary: "A collection of curated command-line tools for general IT tasks, built\ + \ with Viash.\n" + description: "`toolbox` provides a versatile suite of IT components, following the\ + \ robust Viash (https://viash.io) framework.\nThis package focuses on delivering\ + \ reliable, standalone tools that can be easily integrated into larger computational\ + \ workflows.\n\nThe core philosophy emphasizes **reusability**, **reproducibility**,\ + \ and adherence to **best practices** in component creation. Key features of `toolbox`\ + \ components include:\n\n* **Standalone & Nextflow Ready:** Execute components\ + \ directly from the command line or seamlessly incorporate them into Nextflow\ + \ workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for each component and its parameters.\n * Full exposure of the underlying\ + \ tool's arguments for maximum flexibility.\n * Containerized (Docker) to ensure\ + \ consistent environments and manage dependencies, leading to enhanced reproducibility.\n\ + \ * Unit tested to verify functionality and ensure reliability.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" keywords: - "toolbox" - "command-line" diff --git a/target/executable/bgzip/bgzip b/target/executable/bgzip/bgzip index f70fde6..72e59fc 100755 --- a/target/executable/bgzip/bgzip +++ b/target/executable/bgzip/bgzip @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# bgzip main +# bgzip v0.1.0 # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,67 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bgzip main" - echo "" - echo "Block compression/decompression utility" - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " file to be compressed or decompressed" - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " compressed or decompressed output" - echo "" - echo " -I, --index_name" - echo " type: file, output, file must exist" - echo " name of BGZF index file [file.gz.gzi]" - echo "" - echo "Arguments:" - echo " -b, --offset" - echo " type: integer" - echo " decompress at virtual file pointer (0-based uncompressed offset)" - echo "" - echo " -d, --decompress" - echo " type: boolean_true" - echo " decompress the input file" - echo "" - echo " -g, --rebgzip" - echo " type: boolean_true" - echo " use an index file to bgzip a file" - echo "" - echo " -i, --index" - echo " type: boolean_true" - echo " compress and create BGZF index" - echo "" - echo " -l, --compress_level" - echo " type: integer" - echo " min: -1" - echo " max: 9" - echo " compression level to use when compressing; 0 to 9, or -1 for default" - echo " [-1]" - echo "" - echo " -r, --reindex" - echo " type: boolean_true" - echo " (re)index the output file" - echo "" - echo " -s, --size" - echo " type: integer" - echo " min: 0" - echo " decompress INT bytes (uncompressed size)" - echo "" - echo " -t, --test" - echo " type: boolean_true" - echo " test integrity of compressed file" - echo "" - echo " --binary" - echo " type: boolean_true" - echo " Don't align blocks with text lines" -} # initialise variables VIASH_MODE='run' @@ -509,10 +448,10 @@ ENTRYPOINT [] RUN bgzip -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/bgzip: "\1"/' > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component bgzip" -LABEL org.opencontainers.image.created="2024-09-13T08:50:00Z" +LABEL org.opencontainers.image.created="2025-05-06T20:10:22Z" LABEL org.opencontainers.image.source="https://github.com/samtools/htslib" -LABEL org.opencontainers.image.revision="a3cebc0315249e08a4a39d175670f29a800c1c5f" -LABEL org.opencontainers.image.version="main" +LABEL org.opencontainers.image.revision="d33eb370683ba1f1132ec7d7b6884c8040dc901a" +LABEL org.opencontainers.image.version="v0.1.0" VIASHDOCKER fi @@ -626,6 +565,93 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bgzip v0.1.0" + echo "" + echo "Block compression/decompression utility" + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " file to be compressed or decompressed" + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " compressed or decompressed output" + echo "" + echo " -I, --index_name" + echo " type: file, output, file must exist" + echo " name of BGZF index file [file.gz.gzi]" + echo "" + echo "Arguments:" + echo " -b, --offset" + echo " type: integer" + echo " decompress at virtual file pointer (0-based uncompressed offset)" + echo "" + echo " -d, --decompress" + echo " type: boolean_true" + echo " decompress the input file" + echo "" + echo " -g, --rebgzip" + echo " type: boolean_true" + echo " use an index file to bgzip a file" + echo "" + echo " -i, --index" + echo " type: boolean_true" + echo " compress and create BGZF index" + echo "" + echo " -l, --compress_level" + echo " type: integer" + echo " min: -1" + echo " max: 9" + echo " compression level to use when compressing; 0 to 9, or -1 for default" + echo " [-1]" + echo "" + echo " -r, --reindex" + echo " type: boolean_true" + echo " (re)index the output file" + echo "" + echo " -s, --size" + echo " type: integer" + echo " min: 0" + echo " decompress INT bytes (uncompressed size)" + echo "" + echo " -t, --test" + echo " type: boolean_true" + echo " test integrity of compressed file" + echo "" + echo " --binary" + echo " type: boolean_true" + echo " Don't align blocks with text lines" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' @@ -648,7 +674,7 @@ while [[ $# -gt 0 ]]; do shift 1 ;; --version) - echo "bgzip main" + echo "bgzip v0.1.0" exit ;; --input) @@ -884,7 +910,7 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then # determine docker image id if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/toolbox/bgzip:main' + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/toolbox/bgzip:v0.1.0' fi # print dockerfile diff --git a/target/executable/yq/.config.vsh.yaml b/target/executable/yq/.config.vsh.yaml index b6bce7b..b99f52e 100644 --- a/target/executable/yq/.config.vsh.yaml +++ b/target/executable/yq/.config.vsh.yaml @@ -1,5 +1,5 @@ name: "yq" -version: "main" +version: "v0.1.0" argument_groups: - name: "Inputs" arguments: @@ -149,6 +149,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -234,7 +237,7 @@ engines: id: "docker" image: "alpine:latest" target_registry: "images.viash-hub.com" - target_tag: "main" + target_tag: "v0.1.0" namespace_separator: "/" setup: - type: "apk" @@ -254,22 +257,35 @@ build_info: engine: "docker|native" output: "target/executable/yq" executable: "target/executable/yq/yq" - viash_version: "0.9.0" - git_commit: "a3cebc0315249e08a4a39d175670f29a800c1c5f" + viash_version: "0.9.4" + git_commit: "d33eb370683ba1f1132ec7d7b6884c8040dc901a" git_remote: "https://github.com/viash-hub/toolbox" package_config: name: "toolbox" - version: "main" - description: "A collection of command-line tools.\n" + version: "v0.1.0" + summary: "A collection of curated command-line tools for general IT tasks, built\ + \ with Viash.\n" + description: "`toolbox` provides a versatile suite of IT components, following the\ + \ robust Viash (https://viash.io) framework.\nThis package focuses on delivering\ + \ reliable, standalone tools that can be easily integrated into larger computational\ + \ workflows.\n\nThe core philosophy emphasizes **reusability**, **reproducibility**,\ + \ and adherence to **best practices** in component creation. Key features of `toolbox`\ + \ components include:\n\n* **Standalone & Nextflow Ready:** Execute components\ + \ directly from the command line or seamlessly incorporate them into Nextflow\ + \ workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for each component and its parameters.\n * Full exposure of the underlying\ + \ tool's arguments for maximum flexibility.\n * Containerized (Docker) to ensure\ + \ consistent environments and manage dependencies, leading to enhanced reproducibility.\n\ + \ * Unit tested to verify functionality and ensure reliability.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" keywords: - "toolbox" - "command-line" diff --git a/target/executable/yq/yq b/target/executable/yq/yq index 220112b..d4927d9 100755 --- a/target/executable/yq/yq +++ b/target/executable/yq/yq @@ -1,8 +1,8 @@ #!/usr/bin/env bash -# yq main +# yq v0.1.0 # -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # Intuitive. # @@ -169,50 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "yq main" - echo "" - echo "A portable YAML, JSON, XML, CSV, TOML and properties processor" - echo "" - echo "Inputs:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example: input.yaml" - echo " files to be processed" - echo "" - echo "Outputs:" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example: output.yaml" - echo " output file" - echo "" - echo "Arguments:" - echo " --eval" - echo " type: string, required parameter" - echo " example: .name = \"foo\"" - echo " expression to evaluate" - echo "" - echo " -I, --indent" - echo " type: integer" - echo " sets indent level for output (default 2)" - echo "" - echo " -p, --input_format" - echo " type: string" - echo " choices: [ auto, a, yaml, y, json, j, props, p, csv, c, tsv, t, xml, x," - echo "base64, uri, toml, shell, s, lua, l ]" - echo " parse format for input. (default \"auto\")" - echo "" - echo " -o, --output_format" - echo " type: string" - echo " choices: [ auto, a, yaml, y, json, j, props, p, csv, c, tsv, t, xml, x," - echo "base64, uri, toml, shell, s, lua, l ]" - echo " output format type. (default \"auto\")" - echo "" - echo " -P, --pretty_print" - echo " type: boolean_true" - echo " pretty print, shorthand for '... style = \"\"'" -} # initialise variables VIASH_MODE='run' @@ -494,10 +450,10 @@ RUN apk add --no-cache bash yq-go RUN /usr/bin/yq --version | sed 's/.*version\sv\(.*\)/yq: "\1"/' > /var/software_versions.txt LABEL org.opencontainers.image.description="Companion container for running component yq" -LABEL org.opencontainers.image.created="2024-09-13T08:49:59Z" +LABEL org.opencontainers.image.created="2025-05-06T20:10:22Z" LABEL org.opencontainers.image.source="https://github.com/mikefarah/yq" -LABEL org.opencontainers.image.revision="a3cebc0315249e08a4a39d175670f29a800c1c5f" -LABEL org.opencontainers.image.version="main" +LABEL org.opencontainers.image.revision="d33eb370683ba1f1132ec7d7b6884c8040dc901a" +LABEL org.opencontainers.image.version="v0.1.0" VIASHDOCKER fi @@ -611,6 +567,76 @@ fi # initialise docker variables VIASH_DOCKER_RUN_ARGS=(-i --rm) + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "yq v0.1.0" + echo "" + echo "A portable YAML, JSON, XML, CSV, TOML and properties processor" + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: input.yaml" + echo " files to be processed" + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.yaml" + echo " output file" + echo "" + echo "Arguments:" + echo " --eval" + echo " type: string, required parameter" + echo " example: .name = \"foo\"" + echo " expression to evaluate" + echo "" + echo " -I, --indent" + echo " type: integer" + echo " sets indent level for output (default 2)" + echo "" + echo " -p, --input_format" + echo " type: string" + echo " choices: [ auto, a, yaml, y, json, j, props, p, csv, c, tsv, t, xml, x," + echo "base64, uri, toml, shell, s, lua, l ]" + echo " parse format for input. (default \"auto\")" + echo "" + echo " -o, --output_format" + echo " type: string" + echo " choices: [ auto, a, yaml, y, json, j, props, p, csv, c, tsv, t, xml, x," + echo "base64, uri, toml, shell, s, lua, l ]" + echo " output format type. (default \"auto\")" + echo "" + echo " -P, --pretty_print" + echo " type: boolean_true" + echo " pretty print, shorthand for '... style = \"\"'" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + # initialise array VIASH_POSITIONAL_ARGS='' @@ -633,7 +659,7 @@ while [[ $# -gt 0 ]]; do shift 1 ;; --version) - echo "yq main" + echo "yq v0.1.0" exit ;; --input) @@ -818,7 +844,7 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then # determine docker image id if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/toolbox/yq:main' + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/toolbox/yq:v0.1.0' fi # print dockerfile diff --git a/target/nextflow/bgzip/.config.vsh.yaml b/target/nextflow/bgzip/.config.vsh.yaml index ca4051f..5c2aa39 100644 --- a/target/nextflow/bgzip/.config.vsh.yaml +++ b/target/nextflow/bgzip/.config.vsh.yaml @@ -1,5 +1,5 @@ name: "bgzip" -version: "main" +version: "v0.1.0" argument_groups: - name: "Inputs" arguments: @@ -126,6 +126,9 @@ test_resources: path: "test_data" info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -207,7 +210,7 @@ engines: id: "docker" image: "quay.io/biocontainers/htslib:1.19--h81da01d_0" target_registry: "images.viash-hub.com" - target_tag: "main" + target_tag: "v0.1.0" namespace_separator: "/" setup: - type: "docker" @@ -224,22 +227,35 @@ build_info: engine: "docker|native" output: "target/nextflow/bgzip" executable: "target/nextflow/bgzip/main.nf" - viash_version: "0.9.0" - git_commit: "a3cebc0315249e08a4a39d175670f29a800c1c5f" + viash_version: "0.9.4" + git_commit: "d33eb370683ba1f1132ec7d7b6884c8040dc901a" git_remote: "https://github.com/viash-hub/toolbox" package_config: name: "toolbox" - version: "main" - description: "A collection of command-line tools.\n" + version: "v0.1.0" + summary: "A collection of curated command-line tools for general IT tasks, built\ + \ with Viash.\n" + description: "`toolbox` provides a versatile suite of IT components, following the\ + \ robust Viash (https://viash.io) framework.\nThis package focuses on delivering\ + \ reliable, standalone tools that can be easily integrated into larger computational\ + \ workflows.\n\nThe core philosophy emphasizes **reusability**, **reproducibility**,\ + \ and adherence to **best practices** in component creation. Key features of `toolbox`\ + \ components include:\n\n* **Standalone & Nextflow Ready:** Execute components\ + \ directly from the command line or seamlessly incorporate them into Nextflow\ + \ workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for each component and its parameters.\n * Full exposure of the underlying\ + \ tool's arguments for maximum flexibility.\n * Containerized (Docker) to ensure\ + \ consistent environments and manage dependencies, leading to enhanced reproducibility.\n\ + \ * Unit tested to verify functionality and ensure reliability.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" keywords: - "toolbox" - "command-line" diff --git a/target/nextflow/bgzip/main.nf b/target/nextflow/bgzip/main.nf index 3728bd1..d262131 100644 --- a/target/nextflow/bgzip/main.nf +++ b/target/nextflow/bgzip/main.nf @@ -1,6 +1,6 @@ -// bgzip main +// bgzip v0.1.0 // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2805,7 +3031,7 @@ meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ "name" : "bgzip", - "version" : "main", + "version" : "v0.1.0", "argument_groups" : [ { "name" : "Inputs", @@ -2969,6 +3195,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3065,7 +3295,7 @@ meta = [ "id" : "docker", "image" : "quay.io/biocontainers/htslib:1.19--h81da01d_0", "target_registry" : "images.viash-hub.com", - "target_tag" : "main", + "target_tag" : "v0.1.0", "namespace_separator" : "/", "setup" : [ { @@ -3086,22 +3316,23 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/bgzip", - "viash_version" : "0.9.0", - "git_commit" : "a3cebc0315249e08a4a39d175670f29a800c1c5f", + "viash_version" : "0.9.4", + "git_commit" : "d33eb370683ba1f1132ec7d7b6884c8040dc901a", "git_remote" : "https://github.com/viash-hub/toolbox" }, "package_config" : { "name" : "toolbox", - "version" : "main", - "description" : "A collection of command-line tools.\n", - "viash_version" : "0.9.0", + "version" : "v0.1.0", + "summary" : "A collection of curated command-line tools for general IT tasks, built with Viash.\n", + "description" : "`toolbox` provides a versatile suite of IT components, following the robust Viash (https://viash.io) framework.\nThis package focuses on delivering reliable, standalone tools that can be easily integrated into larger computational workflows.\n\nThe core philosophy emphasizes **reusability**, **reproducibility**, and adherence to **best practices** in component creation. Key features of `toolbox` components include:\n\n* **Standalone & Nextflow Ready:** Execute components directly from the command line or seamlessly incorporate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for each component and its parameters.\n * Full exposure of the underlying tool's arguments for maximum flexibility.\n * Containerized (Docker) to ensure consistent environments and manage dependencies, leading to enhanced reproducibility.\n * Unit tested to verify functionality and ensure reliability.\n", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", - ".engines[.type == 'docker'].target_tag := 'main'" + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" ], "keywords" : [ "toolbox", @@ -3516,7 +3747,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3530,6 +3761,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names @@ -3543,7 +3795,7 @@ meta["defaults"] = [ "container" : { "registry" : "images.viash-hub.com", "image" : "vsh/toolbox/bgzip", - "tag" : "main" + "tag" : "v0.1.0" }, "tag" : "$id" }'''), diff --git a/target/nextflow/bgzip/nextflow.config b/target/nextflow/bgzip/nextflow.config index 29c9435..bca0d14 100644 --- a/target/nextflow/bgzip/nextflow.config +++ b/target/nextflow/bgzip/nextflow.config @@ -2,7 +2,7 @@ manifest { name = 'bgzip' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' - version = 'main' + version = 'v0.1.0' description = 'Block compression/decompression utility' } diff --git a/target/nextflow/bgzip/nextflow_schema.json b/target/nextflow/bgzip/nextflow_schema.json index 43475ae..2b03957 100644 --- a/target/nextflow/bgzip/nextflow_schema.json +++ b/target/nextflow/bgzip/nextflow_schema.json @@ -37,10 +37,10 @@ "output": { "type": "string", - "description": "Type: `file`, required, default: `$id.$key.output.output`. compressed or decompressed output", - "help_text": "Type: `file`, required, default: `$id.$key.output.output`. compressed or decompressed output" + "description": "Type: `file`, required, default: `$id.$key.output`. compressed or decompressed output", + "help_text": "Type: `file`, required, default: `$id.$key.output`. compressed or decompressed output" , - "default": "$id.$key.output.output" + "default":"$id.$key.output" } @@ -48,10 +48,10 @@ "index_name": { "type": "string", - "description": "Type: `file`, default: `$id.$key.index_name.index_name`. name of BGZF index file [file", - "help_text": "Type: `file`, default: `$id.$key.index_name.index_name`. name of BGZF index file [file.gz.gzi]" + "description": "Type: `file`, default: `$id.$key.index_name`. name of BGZF index file [file", + "help_text": "Type: `file`, default: `$id.$key.index_name`. name of BGZF index file [file.gz.gzi]" , - "default": "$id.$key.index_name.index_name" + "default":"$id.$key.index_name" } @@ -82,7 +82,7 @@ "description": "Type: `boolean_true`, default: `false`. decompress the input file", "help_text": "Type: `boolean_true`, default: `false`. decompress the input file" , - "default": "False" + "default":false } @@ -93,7 +93,7 @@ "description": "Type: `boolean_true`, default: `false`. use an index file to bgzip a file", "help_text": "Type: `boolean_true`, default: `false`. use an index file to bgzip a file" , - "default": "False" + "default":false } @@ -104,7 +104,7 @@ "description": "Type: `boolean_true`, default: `false`. compress and create BGZF index", "help_text": "Type: `boolean_true`, default: `false`. compress and create BGZF index" , - "default": "False" + "default":false } @@ -125,7 +125,7 @@ "description": "Type: `boolean_true`, default: `false`. (re)index the output file", "help_text": "Type: `boolean_true`, default: `false`. (re)index the output file" , - "default": "False" + "default":false } @@ -146,7 +146,7 @@ "description": "Type: `boolean_true`, default: `false`. test integrity of compressed file", "help_text": "Type: `boolean_true`, default: `false`. test integrity of compressed file" , - "default": "False" + "default":false } @@ -157,7 +157,7 @@ "description": "Type: `boolean_true`, default: `false`. Don\u0027t align blocks with text lines", "help_text": "Type: `boolean_true`, default: `false`. Don\u0027t align blocks with text lines" , - "default": "False" + "default":false } diff --git a/target/nextflow/yq/.config.vsh.yaml b/target/nextflow/yq/.config.vsh.yaml index c46238b..560dab0 100644 --- a/target/nextflow/yq/.config.vsh.yaml +++ b/target/nextflow/yq/.config.vsh.yaml @@ -1,5 +1,5 @@ name: "yq" -version: "main" +version: "v0.1.0" argument_groups: - name: "Inputs" arguments: @@ -149,6 +149,9 @@ test_resources: is_executable: true info: null status: "enabled" +scope: + image: "public" + target: "public" requirements: commands: - "ps" @@ -234,7 +237,7 @@ engines: id: "docker" image: "alpine:latest" target_registry: "images.viash-hub.com" - target_tag: "main" + target_tag: "v0.1.0" namespace_separator: "/" setup: - type: "apk" @@ -254,22 +257,35 @@ build_info: engine: "docker|native" output: "target/nextflow/yq" executable: "target/nextflow/yq/main.nf" - viash_version: "0.9.0" - git_commit: "a3cebc0315249e08a4a39d175670f29a800c1c5f" + viash_version: "0.9.4" + git_commit: "d33eb370683ba1f1132ec7d7b6884c8040dc901a" git_remote: "https://github.com/viash-hub/toolbox" package_config: name: "toolbox" - version: "main" - description: "A collection of command-line tools.\n" + version: "v0.1.0" + summary: "A collection of curated command-line tools for general IT tasks, built\ + \ with Viash.\n" + description: "`toolbox` provides a versatile suite of IT components, following the\ + \ robust Viash (https://viash.io) framework.\nThis package focuses on delivering\ + \ reliable, standalone tools that can be easily integrated into larger computational\ + \ workflows.\n\nThe core philosophy emphasizes **reusability**, **reproducibility**,\ + \ and adherence to **best practices** in component creation. Key features of `toolbox`\ + \ components include:\n\n* **Standalone & Nextflow Ready:** Execute components\ + \ directly from the command line or seamlessly incorporate them into Nextflow\ + \ workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ + \ for each component and its parameters.\n * Full exposure of the underlying\ + \ tool's arguments for maximum flexibility.\n * Containerized (Docker) to ensure\ + \ consistent environments and manage dependencies, leading to enhanced reproducibility.\n\ + \ * Unit tested to verify functionality and ensure reliability.\n" info: null - viash_version: "0.9.0" + viash_version: "0.9.4" source: "src" target: "target" config_mods: - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" + - ".engines[.type == 'docker'].target_tag := 'v0.1.0'" keywords: - "toolbox" - "command-line" diff --git a/target/nextflow/yq/main.nf b/target/nextflow/yq/main.nf index 7b27fbe..ebcdd96 100644 --- a/target/nextflow/yq/main.nf +++ b/target/nextflow/yq/main.nf @@ -1,6 +1,6 @@ -// yq main +// yq v0.1.0 // -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // Intuitive. // @@ -82,64 +82,56 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi foundClass = "List[${e.foundClass}]" } } else if (par.type == "string") { - // cast to string if need be + // cast to string if need be. only cast if the value is a GString if (value instanceof GString) { - value = value.toString() + value = value as String } expectedClass = value instanceof String ? null : "String" } else if (par.type == "integer") { // cast to integer if need be - if (value instanceof String) { + if (value !instanceof Integer) { try { - value = value.toInteger() + value = value as Integer } catch (NumberFormatException e) { - // do nothing + expectedClass = "Integer" } } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" } else if (par.type == "long") { // cast to long if need be - if (value instanceof String) { + if (value !instanceof Long) { try { - value = value.toLong() + value = value as Long } catch (NumberFormatException e) { - // do nothing + expectedClass = "Long" } } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" } else if (par.type == "double") { // cast to double if need be - if (value instanceof String) { + if (value !instanceof Double) { try { - value = value.toDouble() + value = value as Double } catch (NumberFormatException e) { - // do nothing + expectedClass = "Double" } } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" } } - expectedClass = value instanceof Boolean ? null : "Boolean" } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { // cast to path if need be if (value instanceof String) { @@ -151,10 +143,13 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi expectedClass = value instanceof Path ? null : "Path" } else if (par.type == "file" && stage == "input" && par.direction == "output") { // cast to string if need be - if (value instanceof GString) { - value = value.toString() + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } } - expectedClass = value instanceof String ? null : "String" } else { // didn't find a match for par.type expectedClass = par.type @@ -173,7 +168,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi Map _processInputValues(Map inputs, Map config, String id, String key) { if (!workflow.stubRun) { config.allArguments.each { arg -> - if (arg.required) { + if (arg.required && arg.direction == "input") { assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" } @@ -192,15 +187,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) { } // helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - outputs = outputs.collectEntries { name, value -> def par = config.allArguments.find { it.plainName == name && it.direction == "output" } assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" @@ -213,6 +201,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) { return outputs } +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} // helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' class IDChecker { final def items = [] as Set @@ -1666,6 +1664,162 @@ def joinStates(Closure apply_) { } return joinStatesWf } +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + // helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' def collectFiles(obj) { if (obj instanceof java.io.File || obj instanceof Path) { @@ -1723,8 +1877,6 @@ def publishStates(Map args) { // the input files and the target output filenames def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] def yamlFilename = yamlTemplate_ .replaceAll('\\$id', id_) @@ -1737,7 +1889,7 @@ def publishStates(Map args) { // convert state to yaml blob def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -1749,33 +1901,17 @@ process publishStatesProc { publishDir path: "${getPublishDir()}/", mode: "copy" tag "$id" input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + tuple val(id), val(yamlBlob), val(yamlFile) output: - tuple val(id), path{[yamlFile] + outputFiles} + tuple val(id), path{[yamlFile]} script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ } @@ -1806,13 +1942,10 @@ def publishStatesByConfig(Map args) { .replaceAll('\\$\\{key\\}', key_) def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // the processed state is a list of [key, value] tuples, where // - key is a String // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) def processedState = config.allArguments .findAll { it.direction == "output" } @@ -1829,7 +1962,7 @@ def publishStatesByConfig(Map args) { // in the state as-is, but is not something that needs // to be copied from the source path to the dest path if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + return [[key: plainName_, value: value]] } // if the orig state does not contain this filename, // it's an optional argument for which the user specified @@ -1860,13 +1993,9 @@ def publishStatesByConfig(Map args) { if (yamlDir != null) { value_ = yamlDir.relativize(value_) } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] + return value_ } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] + return [["key": plainName_, "value": outputPerFile]] } else { def value_ = java.nio.file.Paths.get(filename) // if id contains a slash @@ -1874,18 +2003,17 @@ def publishStatesByConfig(Map args) { value_ = yamlDir.relativize(value_) } def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + return [["key": plainName_, value: value_]] } } + def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} // convert state to yaml blob def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + [id_, yamlBlob_, yamlFilename] } | publishStatesProc emit: input_ch @@ -2559,7 +2687,8 @@ def _debug(workflowArgs, debugKey) { def workflowFactory(Map args, Map defaultWfArgs, Map meta) { def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) def key_ = workflowArgs["key"] - + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + workflow workflowInstance { take: input_ @@ -2716,12 +2845,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults + def chInitialOutputMulti = chArgsWithDefaults | _debug(workflowArgs, "processed") // run workflow | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] // see if output map contains metadata def meta_ = @@ -2734,19 +2887,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { output_ = output_.findAll{k, v -> k != "_meta"} // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] + [join_id, channelId, id_, output_] } // | view{"chInitialOutput: ${it.take(3)}"} + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + def chNewState = safeJoin(chJoined, chRunFiltered, key_) // input tuple format: [join_id, id, output, prev_state, ...] // output tuple format: [join_id, id, new_state, ...] | map{ tup -> @@ -2755,23 +2983,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) { } if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState + def chPublishStates = chNewState // input tuple format: [join_id, id, new_state, ...] // output tuple format: [join_id, id, new_state] | map{ tup -> tup.take(3) } - safeJoin(chPublish, chArgsWithDefaults, key_) + safeJoin(chPublishStates, chArgsWithDefaults, key_) // input tuple format: [join_id, id, new_state, orig_state, ...] // output tuple format: [id, new_state, orig_state] | map { tup -> tup.drop(1).take(3) - } + } | publishStatesByConfig(key: key_, config: meta.config) } - - // remove join_id and meta chReturn = chNewState | map { tup -> // input tuple format: [join_id, id, new_state, ...] @@ -2805,7 +3031,7 @@ meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ "name" : "yq", - "version" : "main", + "version" : "v0.1.0", "argument_groups" : [ { "name" : "Inputs", @@ -2972,6 +3198,10 @@ meta = [ } ], "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, "requirements" : { "commands" : [ "ps" @@ -3071,7 +3301,7 @@ meta = [ "id" : "docker", "image" : "alpine:latest", "target_registry" : "images.viash-hub.com", - "target_tag" : "main", + "target_tag" : "v0.1.0", "namespace_separator" : "/", "setup" : [ { @@ -3099,22 +3329,23 @@ meta = [ "runner" : "nextflow", "engine" : "docker|native", "output" : "target/nextflow/yq", - "viash_version" : "0.9.0", - "git_commit" : "a3cebc0315249e08a4a39d175670f29a800c1c5f", + "viash_version" : "0.9.4", + "git_commit" : "d33eb370683ba1f1132ec7d7b6884c8040dc901a", "git_remote" : "https://github.com/viash-hub/toolbox" }, "package_config" : { "name" : "toolbox", - "version" : "main", - "description" : "A collection of command-line tools.\n", - "viash_version" : "0.9.0", + "version" : "v0.1.0", + "summary" : "A collection of curated command-line tools for general IT tasks, built with Viash.\n", + "description" : "`toolbox` provides a versatile suite of IT components, following the robust Viash (https://viash.io) framework.\nThis package focuses on delivering reliable, standalone tools that can be easily integrated into larger computational workflows.\n\nThe core philosophy emphasizes **reusability**, **reproducibility**, and adherence to **best practices** in component creation. Key features of `toolbox` components include:\n\n* **Standalone & Nextflow Ready:** Execute components directly from the command line or seamlessly incorporate them into Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation for each component and its parameters.\n * Full exposure of the underlying tool's arguments for maximum flexibility.\n * Containerized (Docker) to ensure consistent environments and manage dependencies, leading to enhanced reproducibility.\n * Unit tested to verify functionality and ensure reliability.\n", + "viash_version" : "0.9.4", "source" : "src", "target" : "target", "config_mods" : [ ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", - ".engines[.type == 'docker'].target_tag := 'main'" + ".engines[.type == 'docker'].target_tag := 'v0.1.0'" ], "keywords" : [ "toolbox", @@ -3513,7 +3744,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { // create process from temp file def binding = new nextflow.script.ScriptBinding([:]) def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) + def parser = _getScriptLoader(session) .setModule(true) .setBinding(binding) def moduleScript = parser.runScript(tempFile) @@ -3527,6 +3758,27 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { return scriptMeta.getProcess(procKey) } +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + // defaults meta["defaults"] = [ // key to be used to trace the process and determine output names @@ -3540,7 +3792,7 @@ meta["defaults"] = [ "container" : { "registry" : "images.viash-hub.com", "image" : "vsh/toolbox/yq", - "tag" : "main" + "tag" : "v0.1.0" }, "tag" : "$id" }'''), diff --git a/target/nextflow/yq/nextflow.config b/target/nextflow/yq/nextflow.config index bd4ddcb..aaef2c4 100644 --- a/target/nextflow/yq/nextflow.config +++ b/target/nextflow/yq/nextflow.config @@ -2,7 +2,7 @@ manifest { name = 'yq' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' - version = 'main' + version = 'v0.1.0' description = 'A portable YAML, JSON, XML, CSV, TOML and properties processor' } diff --git a/target/nextflow/yq/nextflow_schema.json b/target/nextflow/yq/nextflow_schema.json index 3d7ba38..5456163 100644 --- a/target/nextflow/yq/nextflow_schema.json +++ b/target/nextflow/yq/nextflow_schema.json @@ -40,7 +40,7 @@ "description": "Type: `file`, required, default: `$id.$key.output.yaml`, example: `output.yaml`. output file", "help_text": "Type: `file`, required, default: `$id.$key.output.yaml`, example: `output.yaml`. output file" , - "default": "$id.$key.output.yaml" + "default":"$id.$key.output.yaml" } @@ -105,7 +105,7 @@ "description": "Type: `boolean_true`, default: `false`. pretty print, shorthand for \u0027", "help_text": "Type: `boolean_true`, default: `false`. pretty print, shorthand for \u0027... style = \"\"\u0027" , - "default": "False" + "default":false }