Build branch openpipeline_composed/add-integration-methods with version add-integration-methods to openpipeline_composed on branch add-integration-methods (b4f9d7f)

Build pipeline: vsh-ci-build-template-rcbnc Source commit: b4f9d7fdb0 Source message: rename
2025-09-22 10:20:11 +00:00
commit ddc301140b
371 changed files with 200082 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,26 @@
+# IDEs and editors
+/.idea
+.project
+.classpath
+*.launch
+.settings/
+.vscode
+
+# Temp
+gitignore
+test_results
+
+# System Files
+.DS_Store
+Thumbs.db
+
+# Nextflow
+work
+.nextflow*
+trace-*.txt
+
+# viash
+/resources_test/
+
+# pycache
+*__pycache__*
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,3 @@
+# openpipeline_runners x.x.x
+
+Initial release containing a single-cell meta-workflow to process single cell omics samples, perform batch integration and/or label projectsion.
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 openpipelines-bio
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -0,0 +1,24 @@
+viash_version: 0.9.4
+source: src
+target: target
+name: openpipeline_composed
+organization: vsh
+links:
+  repository: https://github.com/openpipelines-bio/openpipeline_composed
+  docker_registry: ghcr.io
+repositories:
+  - name: openpipeline
+    repo: openpipelines-bio/openpipeline
+    type: github
+    tag: 3.0.0
+info:
+  test_resources:
+    - type: s3
+      path: s3://openpipelines-bio/openpipeline_incubator/resources_test
+      dest: resources_test
+config_mods: |
+  .requirements.commands := ['ps']
+  .runners[.type == 'nextflow'].directives.tag := '$id'
+  .resources += {path: '/src/configs/labels.config', dest: 'nextflow_labels.config'}
+  .runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")'
+version: add-integration-methods
--- a/main.nf
+++ b/main.nf
--- a/nextflow.config
+++ b/nextflow.config
--- a/resources_test_scripts/qc_sample_data.sh
+++ b/resources_test_scripts/qc_sample_data.sh
@@ -0,0 +1,166 @@
+#/bin/bash
+
+OUT_DIR=resources_test/qc_sample_data
+OUT_DIR_SPATIAL=resources_test/spatial_qc_sample_data
+
+[ ! -d "$OUT_DIR" ] && mkdir -p "$OUT_DIR"
+[ ! -d "$OUT_DIR_SPATIAL" ] && mkdir -p "$OUT_DIR_SPATIAL"
+
+# fetch/create h5mu from somewhere
+cat > /tmp/params_create_h5mu.yaml <<EOF
+param_list:
+  - id: sample_one
+    input_id: sample_one
+    input: s3://openpipelines-data/10x_5k_anticmv/5k_human_antiCMV_T_TBNK_connect_qc.h5mu
+  - id: sample_two
+    input_id: sample_two
+    input: s3://openpipelines-data/10x_5k_anticmv/5k_human_antiCMV_T_TBNK_connect_qc.h5mu
+output: '\$id.qc.h5mu'
+output_compression: gzip
+publish_dir: "$OUT_DIR"
+EOF
+
+# add the sample ID to the mudata object
+nextflow run openpipelines-bio/openpipeline \
+  -latest \
+  -r 2.1.2 \
+  -main-script target/nextflow/metadata/add_id/main.nf \
+  -c src/configs/labels_ci.config \
+  -profile docker \
+  -params-file /tmp/params_create_h5mu.yaml \
+  -resume
+
+cat > /tmp/params_subset.yaml <<EOF
+param_list:
+  - id: sample_one
+    input: resources_test/qc_sample_data/sample_one.qc.h5mu
+  - id: sample_two
+    input: resources_test/qc_sample_data/sample_two.qc.h5mu
+output: '\$id.qc.h5mu'
+number_of_observations: 10000
+output_compression: gzip
+publish_dir: "$OUT_DIR"
+EOF
+
+# subset h5mus
+nextflow run openpipelines-bio/openpipeline \
+  -latest \
+  -r 2.1.2 \
+  -main-script target/nextflow/filter/subset_h5mu/main.nf \
+  -c src/configs/labels_ci.config \
+  -profile docker \
+  -params-file /tmp/params_subset.yaml \
+  -resume
+
+cat > /tmp/add_metadata_obs.py <<EOF
+import mudata as mu
+import glob
+import numpy as np
+import pandas as pd
+import os
+
+# Directory containing the h5mu files
+out_dir = "$(pwd)/resources_test/qc_sample_data"
+
+# List of h5mu files
+h5mu_files = glob.glob(os.path.join(out_dir, "*.h5mu"))
+print(f"Found {len(h5mu_files)} h5mu files: {h5mu_files}")
+
+# Metadata values to randomly assign
+donor_ids = ["donor_1", "donor_2", "donor_3"]
+cell_types = ["CD4+ T cell", "CD8+ T cell", "B cell", "NK cell", "Monocyte"]
+batches = ["batch_A", "batch_B"]
+conditions = ["treated", "control"]
+
+for h5mu_file in h5mu_files:
+    print(f"Processing {h5mu_file}...")
+    
+    # Load MuData object
+    mdata = mu.read_h5mu(h5mu_file)
+    rna = mdata.mod["rna"]
+    n_obs = rna.n_obs
+    
+    # Generate random metadata
+    np.random.seed(42 + hash(h5mu_file) % 100)  # Different seed for each file but reproducible
+    
+    # Create metadata
+    rna.obs["donor_id"] = np.random.choice(donor_ids, size=n_obs)
+    rna.obs["cell_type"] = np.random.choice(cell_types, size=n_obs)
+    rna.obs["batch"] = np.random.choice(batches, size=n_obs)
+    rna.obs["condition"] = np.random.choice(conditions, size=n_obs)
+    
+    # Add a continuous variable too
+    rna.obs["quality_score"] = np.random.uniform(0, 1, size=n_obs)
+    
+    # Save the modified MuData object
+    mu.write_h5mu(h5mu_file, mdata)
+    print(f"Added metadata to {h5mu_file}")
+
+print("All files processed successfully!")
+EOF
+
+# Execute the Python script
+python /tmp/add_metadata_obs.py
+
+# generate cellbender out for testing
+cat > /tmp/params_cellbender.yaml <<EOF
+param_list:
+  - id: sample_one
+    input: resources_test/qc_sample_data/sample_one.qc.h5mu
+  - id: sample_two
+    input: resources_test/qc_sample_data/sample_two.qc.h5mu
+output: '\$id.qc.cellbender.h5mu'
+epochs: 5
+output_compression: gzip
+publish_dir: "$OUT_DIR"
+EOF
+
+nextflow run openpipelines-bio/openpipeline \
+  -latest \
+  -r 2.1.2 \
+  -main-script target/nextflow/correction/cellbender_remove_background/main.nf \
+  -c src/configs/labels_ci.config \
+  -profile docker \
+  -params-file /tmp/params_cellbender.yaml \
+  -resume
+
+# fetch spatial sample data from s3
+aws s3 sync \
+  --profile di \
+  s3://openpipelines-bio/openpipeline_incubator/resources_test/spatial_qc_sample_data \
+  "$OUT_DIR_SPATIAL"
+
+# generate json for testing
+viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
+  --input "$OUT_DIR"/sample_one.qc.cellbender.h5mu \
+  --input "$OUT_DIR"/sample_two.qc.cellbender.h5mu \
+  --ingestion_method cellranger_multi \
+  --obs_metadata "donor_id;cell_type;batch;condition" \
+  --output "$OUT_DIR"/sc_dataset.json \
+  --output_reporting_json "$OUT_DIR"/sc_report_structure.json
+
+viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
+  --input "$OUT_DIR_SPATIAL"/xenium_tiny.qc.h5mu \
+  --input "$OUT_DIR_SPATIAL"/xenium_tiny.qc.h5mu \
+  --ingestion_method xenium \
+  --min_num_nonzero_vars 1 \
+  --output "$OUT_DIR_SPATIAL"/xenium_dataset.json \
+  --output_reporting_json "$OUT_DIR_SPATIAL"/xenium_report_structure.json
+
+# remove all state yaml files
+rm "$OUT_DIR"/*.yaml
+rm "$OUT_DIR_SPATIAL"/*.yaml
+
+# copy to s3
+aws s3 sync \
+  "$OUT_DIR" \
+  s3://openpipelines-bio/openpipeline_incubator/"$OUT_DIR" \
+  --delete \
+  --dryrun 
+
+
+aws s3 sync \
+  "$OUT_DIR_SPATIAL" \
+  s3://openpipelines-bio/openpipeline_incubator/"$OUT_DIR_SPATIAL" \
+  --delete \
+  --dryrun 
--- a/resources_test_scripts/spatial_qc_sample_data.sh
+++ b/resources_test_scripts/spatial_qc_sample_data.sh
@@ -0,0 +1,37 @@
+#/bin/bash
+
+OUT_DIR=resources_test/spatial_qc_sample_data
+
+[ ! -d "$OUT_DIR" ] && mkdir -p "$OUT_DIR"
+
+# fetch/create h5mu from somewhere
+cat > /tmp/qc.yaml <<EOF
+param_list:
+  - id: xenium_tiny
+    input: s3://openpipelines-bio/openpipeline_spatial/resources_test/xenium/xenium_tiny.h5mu
+  - id: Lung5_Rep2_tiny
+    input: s3://openpipelines-bio/openpipeline_spatial/resources_test/cosmx/Lung5_Rep2_tiny.h5mu
+var_name_mitochondrial_genes: mitochondrial
+var_name_ribosomal_genes: ribosomal
+output: '\$id.qc.h5mu'
+output_compression: gzip
+publish_dir: "$OUT_DIR"
+EOF
+
+nextflow run openpipelines-bio/openpipeline \
+  -latest \
+  -r 2.1.0 \
+  -main-script target/nextflow/workflows/qc/qc/main.nf \
+  -profile docker \
+  -params-file /tmp/qc.yaml \
+  -resume \
+  -config src/configs/labels_ci.config
+
+# copy to s3
+aws s3 sync \
+  --profile di \
+  resources_test/spatial_qc_sample_data \
+  s3://openpipelines-bio/openpipeline_incubator/resources_test/spatial_qc_sample_data \
+  --delete --dryrun \
+  --exclude "*" --include "*.h5mu" \
+  
--- a/src/authors/dorien_roosen.yaml
+++ b/src/authors/dorien_roosen.yaml
@@ -0,0 +1,11 @@
+name: Dorien Roosen
+info:
+  role: Core Team Member
+  links:
+    email: dorien@data-intuitive.com
+    github: dorien-er
+    linkedin: dorien-roosen
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Data Scientist
--- a/src/authors/jakub_majercik.yaml
+++ b/src/authors/jakub_majercik.yaml
@@ -0,0 +1,11 @@
+name: Jakub Majercik
+info:
+  role: Contributor
+  links:
+    email: jakub@data-intuitive.com
+    github: jakubmajercik
+    linkedin: jakubmajercik
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Bioinformatics Engineer
--- a/src/authors/robrecht_cannoodt.yaml
+++ b/src/authors/robrecht_cannoodt.yaml
@@ -0,0 +1,15 @@
+name: Robrecht Cannoodt
+info:
+  role: Core Team Member
+  links:
+    email: robrecht@data-intuitive.com
+    github: rcannood
+    orcid: "0000-0003-3641-729X"
+    linkedin: robrechtcannoodt
+  organizations:
+    - name: Data Intuitive
+      href: https://www.data-intuitive.com
+      role: Data Science Engineer
+    - name: Open Problems
+      href: https://openproblems.bio
+      role: Core Member
--- a/src/authors/weiwei_schultz.yaml
+++ b/src/authors/weiwei_schultz.yaml
@@ -0,0 +1,6 @@
+name: Weiwei Schultz
+info:
+  role: Contributor
+  organizations:
+    - name: Janssen R&D US
+      role: Associate Director Data Sciences
--- a/src/configs/integration_tests.config
+++ b/src/configs/integration_tests.config
@@ -0,0 +1,36 @@
+profiles {
+
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+}
--- a/src/configs/labels.config
+++ b/src/configs/labels.config
@@ -0,0 +1,66 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = null
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
+  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
+
+def get_memory(to_compare) {
+  if (!process.containsKey("maxMemory") || !process.maxMemory) {
+    return to_compare
+  }
+
+  try {
+    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+      return process.maxMemory
+    }
+    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+      return max_memory as nextflow.util.MemoryUnit
+    }
+    else {
+      return to_compare
+    }  
+  } catch (all) {
+        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+        System.exit(1)
+  }
+}
--- a/src/configs/labels_ci.config
+++ b/src/configs/labels_ci.config
@@ -0,0 +1,105 @@
+process {
+  withLabel: lowmem { memory = 13.Gb }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midmem { memory = 13.Gb }
+  withLabel: midcpu { cpus = 4 }
+  withLabel: highmem { memory = 13.Gb }
+  withLabel: highcpu { cpus = 4 }
+  withLabel: veryhighmem { memory = 13.Gb }
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+}
+
+env.NUMBA_CACHE_DIR = '/tmp'
+
+trace {
+    enabled = true
+    overwrite = true
+}
+dag {
+  overwrite = true
+}
+
+process.maxForks = 1
+
+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
+def get_memory(to_compare) {
+  if (!process.containsKey("maxMemory") || !process.maxMemory) {
+    return to_compare
+  }
+
+  try {
+    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+      return process.maxMemory
+    }
+    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+      return max_memory as nextflow.util.MemoryUnit
+    }
+    else {
+      return to_compare
+    }  
+  } catch (all) {
+        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+        System.exit(1)
+  }
+}
--- a/src/single_cell/process_integrate_annotate/config.vsh.yaml
+++ b/src/single_cell/process_integrate_annotate/config.vsh.yaml
@@ -0,0 +1,385 @@
+name: "process_integrate_annotate"
+namespace: "single_cell"
+description: |
+  A pipeline to process, integrate and annotate single cell (multi-)omics data.
+  Available integration methods:
+  - Harmony
+  - scVI
+  Available annotation methods:
+  - CellTypist
+  - scANVI (with scArches)
+
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+    roles: [ author, maintainer ]
+  - __merge__: /src/authors/weiwei_schultz.yaml
+    roles: [ contributor ]
+
+argument_groups:
+  - name: Input (query) data arguments
+    description: The input query dataset(s) to be annotated
+    arguments:
+      - name: "--id"
+        required: true
+        type: string
+        description: ID of the sample.
+        example: foo
+      - name: "--input"
+        required: true
+        type: file
+        description: Input query dataset(s) to be annotated
+        example: input.h5mu
+      - name: "--modality"
+        default: "rna"
+        type: string
+        description: Modality to be processed. Should match the modality in the --reference dataset, if provided.
+      - name: "--input_layer"
+        type: string
+        description: "The layer in the input data containing the raw counts, if .X is not to be used."
+        required: false
+      - name: "--input_var_gene_names"
+        type: string
+        required: false
+        description: |
+          The name of the adata var column containing gene names; when no gene_name_layer is provided, the var index will be used.
+      - name: "--input_reference_gene_overlap"
+        type: integer
+        default: 100
+        min: 1
+        description: | 
+          The minimum number of genes present in both the reference and query datasets.
+
+  - name: Reference data arguments
+    description: Dataset to be used as a reference for label transfer and to train annotation algorithms on.
+    arguments:
+      - name: "--reference"
+        type: file
+        required: false
+        example: reference.h5mu
+        description: |
+          The reference dataset in .h5mu format to be used as a reference mapper and to train annotation algorithms on.
+      - name: "--reference_layer_raw_counts"
+        type: string
+        description: "The layer in the reference dataset containing the raw counts, if .X is not to be used."
+        required: false
+      - name: "--reference_layer_lognormalized_counts"
+        type: string
+        default: log_normalized
+        description: "The layer in the reference dataset containing the log-normalized counts, if .X is not to be used."
+      - name: "--reference_var_gene_names"
+        type: string
+        required: false
+        description: |
+          The name of the adata .var column containing gene names if the .var index is not to be used.
+      - name: "--reference_obs_batch"
+        type: string
+        required: false 
+        description: |
+          The .obs column of the reference dataset containing the batch information.
+      - name: "--reference_obs_label"
+        type: string
+        example: cell_type
+        required: false
+        description: The `.obs` key of the target labels to tranfer.
+      - name: "--reference_obs_label_unlabeled_category"
+        type: string
+        default: "Unkown"
+        description: "Value in the --reference_obs_label field that indicates unlabeled observations"
+      - name: "--reference_var_input"
+        type: string
+        required: false
+        description: |
+          .var column containing highly variable genes. By default, do not subset genes.
+
+
+  - name: Methods
+    description: The available annotation and integration methods to integrate and/or annotate the query dataset(s) with.
+    arguments:
+      - name: "--integration_methods"
+        type: string
+        multiple: true
+        required: false
+        choices: [harmony, scvi]
+        example: harmony;scvi
+        description: Integration methods to be executed.
+      - name: "--annotation_methods"
+        type: string
+        multiple: true
+        required: false
+        choices: [celltypist, scanvi_scarches]
+        example: celltypist;scanvi_scarches
+        description: Annotation methods to be executed.
+        
+  - name: "Pre-processing options: RNA filtering"
+    description: Pre-processing options for filtering RNA data
+    arguments:
+      - name: "--rna_min_counts"
+        example: 200
+        type: integer
+        description: Minimum number of counts captured per cell.
+      - name: "--rna_max_counts"
+        example: 5000000
+        type: integer
+        description: Maximum number of counts captured per cell.
+      - name: "--rna_min_genes_per_cell"
+        type: integer
+        example: 200
+        description: Minimum of non-zero values per cell.
+      - name: "--rna_max_genes_per_cell"
+        example: 1500000
+        type: integer
+        description: Maximum of non-zero values per cell.
+      - name: "--rna_min_cells_per_gene"
+        example: 3
+        type: integer
+        description: Minimum of non-zero values per gene.
+      - name: "--rna_min_fraction_mito"
+        example: 0
+        type: double
+        description: Minimum fraction of UMIs that are mitochondrial.
+      - name: "--rna_max_fraction_mito"
+        type: double
+        example: 0.2
+        description: Maximum fraction of UMIs that are mitochondrial.
+
+  - name: "Pre-processing options: Highly variable features detection"
+    description: Pre-processing options for detecting highly variable features
+    arguments:
+      - name: "--n_hvg"
+        type: integer
+        description: |
+          Number of highly-variable features to keep. 
+          Only relevant if HVG need to be calculated across query and reference datasets (e.g. for --annotation_methods scvi_knn and harmony_knn). 
+          For reference mapping-based methods, the HVG's specified in --reference_var_input will be used.
+        default: 2000
+  
+  - name: "Pre-processing options: Mitochondrial & Ribosomal Gene Detection"
+    description: Pre-processing options for detecting mitochondrial genes
+    arguments:
+      - name: "--var_name_mitochondrial_genes"
+        type: string
+        required: false
+        description: |
+          In which .var slot to store a boolean array corresponding the mitochondrial genes.
+      - name: "--var_name_ribosomal_genes"
+        type: string
+        required: false
+        description: |
+          In which .var slot to store a boolean array corresponding the ribosomal genes.
+      - name: "--obs_name_mitochondrial_fraction"
+        type: string
+        required: false
+        description: |
+          When specified, write the fraction of counts originating from mitochondrial genes 
+          (based on --mitochondrial_gene_regex) to an .obs column with the specified name.
+          Requires --var_name_mitochondrial_genes.
+      - name: "--obs_name_ribosomal_fraction"
+        type: string
+        required: false
+        description: |
+          When specified, write the fraction of counts originating from ribosomal genes 
+          (based on --ribosomal_gene_regex) to an .obs column with the specified name.
+          Requires --var_name_ribosomal_genes.
+      - name: --mitochondrial_gene_regex
+        type: string
+        description: |
+          Regex string that identifies mitochondrial genes from --var_gene_names.
+          By default will detect human and mouse mitochondrial genes from a gene symbol.
+        required: false
+        default: "^[mM][tT]-"
+      - name: --ribosomal_gene_regex
+        type: string
+        description: |
+          Regex string that identifies ribosomal genes from --var_gene_names.
+          By default will detect human and mouse ribosomal genes from a gene symbol.
+        required: false
+        default: "^[Mm]?[Rr][Pp][LlSs]"
+
+  - name: "Pre-processing options: QC metrics calculation options"
+    description: Pre-processing options for calculating QC metrics
+    arguments:
+      - name: "--var_qc_metrics"
+        description: |
+          Keys to select a boolean (containing only True or False) column from .var.
+          For each cell, calculate the proportion of total values for genes which are labeled 'True', 
+          compared to the total sum of the values for all genes. Defaults to the combined values specified for
+          --var_name_mitochondrial_genes and --highly_variable_features_var_output.
+        type: string
+        multiple: True
+        multiple_sep: ','
+        required: false
+        example: "ercc,highly_variable"
+
+  - name: Harmony integration options
+    description: Specifications for harmony integration.
+    arguments:
+      - name: "--harmony_theta"
+        type: double
+        description: |
+          Diversity clustering penalty parameter. Specify for each variable in group.by.vars. 
+          theta=0 does not encourage any diversity. Larger values of theta
+          result in more diverse clusters."
+        default: 2
+        example: [0, 1, 2]
+        multiple: true
+      - name: "--harmony_obs_covariates"
+        type: string
+        description: "The .obs field(s) that define the covariate(s) to regress out."
+        example: ["batch", "sample"]
+        required: true
+        multiple: true
+        default: "sample_id"
+
+  - name: scVI, scANVI and scArches training options
+    # TODO - possibly provide separate training options for scVI, scANVI and scArches
+    description: Training arguments for scVI, scANVI and scArches. Relevant for --annotation_methods 'scvi_knn' and 'scanvi_scarches'.
+    arguments:
+      - name: "--early_stopping"
+        required: false
+        type: boolean
+        description: "Whether to perform early stopping with respect to the validation set."
+      - name: "--early_stopping_monitor"
+        choices: ["elbo_validation", "reconstruction_loss_validation", "kl_local_validation"]
+        default: "elbo_validation"
+        type: string
+        description: "Metric logged during validation set epoch."
+      - name: "--early_stopping_patience"
+        type: integer
+        min: 1
+        default: 45
+        description: "Number of validation epochs with no improvement after which training will be stopped."
+      - name: "--early_stopping_min_delta"
+        min: 0
+        type: double
+        default: 0.0
+        description: "Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement."
+      - name: "--max_epochs"
+        type: integer
+        description: "Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest."
+        required: false
+      - name: "--reduce_lr_on_plateau"
+        description: "Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus."
+        type: boolean
+        default: True
+      - name: "--lr_factor"
+        description: "Factor to reduce learning rate."
+        type: double
+        default: 0.6
+        min: 0
+      - name: "--lr_patience"
+        description: "Number of epochs with no improvement after which learning rate will be reduced."
+        type: double
+        default: 30
+        min: 0
+
+  - name: CellTypist reference model
+    description: The CellTypist reference model to use for annotation. If not provided, the reference dataset will be used for model training.
+    arguments:
+      - name: "--celltypist_model"
+        type: file
+        description: "Pretrained model in pkl format. If not provided, the model will be trained on the reference data and --reference should be provided."
+        required: false
+        example: pretrained_model.pkl
+
+  - name: CellTypist annotation options
+    description: Specifications for CellTypist annotation.
+    arguments:
+      - name: "--celltypist_feature_selection"
+        type: boolean
+        description: "Whether to perform feature selection."
+        default: false
+      - name: "--celltypist_majority_voting"
+        type: boolean
+        description: "Whether to refine the predicted labels by running the majority voting classifier after over-clustering."
+        default: false
+      - name: "--celltypist_C"
+        type: double
+        description: "Inverse of regularization strength in logistic regression."
+        default: 1.0
+      - name: "--celltypist_max_iter"
+        type: integer
+        description: "Maximum number of iterations before reaching the minimum of the cost function."
+        default: 1000
+      - name: "--celltypist_use_SGD"
+        type: boolean_true
+        description: "Whether to use the stochastic gradient descent algorithm."
+      - name: "--celltypist_min_prop"
+        type: double
+        description: |
+          "For the dominant cell type within a subcluster, the minimum proportion of cells required to 
+          support naming of the subcluster by this cell type. Ignored if majority_voting is set to False. 
+          Subcluster that fails to pass this proportion threshold will be assigned 'Heterogeneous'."
+        default: 0
+
+  - name: Clustering options
+    description: Arguments for Leiden clustering. Only relevant for --annotation_methods `scvi_knn`, `scanvi_scarches` and `harmony_knn`.
+    arguments:
+      - name: "--leiden_resolution"
+        type: double
+        description: Control the coarseness of the clustering. Higher values lead to more clusters.
+        default: [1]
+        multiple: true
+
+  - name: Neighbor classifier arguments
+    description: Arguments related to calculating the n nearest neighbors. Only relevant for --annotation_methods `scvi_knn`, `scanvi_scarches` and `harmony_knn`.
+    arguments:
+      - name: "--knn_weights"
+        type: string
+        default: "uniform"
+        choices: ["uniform", "distance"]
+        description: |
+          Weight function used in prediction. Possible values are:
+          `uniform` (all points in each neighborhood are weighted equally) or 
+          `distance` (weight points by the inverse of their distance)
+      - name: "--knn_n_neighbors"
+        type: integer
+        default: 15
+        min: 5
+        required: false
+        description: |
+          The number of neighbors to use in k-neighbor graph structure used for fast approximate nearest neighbor search with PyNNDescent. 
+          Larger values will result in more accurate search results at the cost of computation time.
+
+  - name: Outputs
+    description: The output file to write the annotated dataset to.
+    arguments:
+    - name: "--output"
+      type: file
+      direction: output
+      required: true
+      description: |
+        The output file.
+      example: output.h5mu
+
+dependencies:
+  - name: workflows/multiomics/process_samples
+    alias: process_samples_workflow
+    repository: openpipeline
+  - name: annotate/celltypist
+    repository: openpipeline
+    alias: celltypist_annotation
+  - name: workflows/annotation/scanvi_scarches
+    repository: openpipeline
+    alias: scanvi_scarches_annotation
+  - name: workflows/integration/harmony_leiden
+    repository: openpipeline
+    alias: harmony_integration
+  - name: workflows/integration/scvi_leiden
+    repository: openpipeline
+    alias: scvi_integration
+
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+
+test_resources:
+  - type: nextflow_script
+    path: test.nf
+    entrypoint: test_wf
+  - path: /resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu
+  - path: /resources_test/annotation_test_data/TS_Blood_filtered.h5mu
+  - path: /resources_test/annotation_test_data/celltypist_model_Immune_All_Low.pkl
+
+runners:
+  - type: nextflow
--- a/src/single_cell/process_integrate_annotate/integration_test.sh
+++ b/src/single_cell/process_integrate_annotate/integration_test.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+nextflow \
+  run . \
+  -main-script src/single_cell/process_integrate_annotate/test.nf \
+  -entry test_wf \
+  -resume \
+  -profile docker \
+  -c src/configs/labels_ci.config \
+  -c src/configs/integration_tests.config \
+  --publish_dir test
+
+nextflow \
+  run . \
+  -main-script src/single_cell/process_integrate_annotate/test.nf \
+  -profile docker,no_publish \
+  -resume \
+  -entry test_wf_2 \
+  -c src/configs/labels_ci.config \
+  -c src/configs/integration_tests.config
+
+nextflow \
+  run . \
+  -main-script src/single_cell/process_integrate_annotate/test.nf \
+  -profile docker,no_publish \
+  -resume \
+  -entry test_wf_3 \
+  -c src/configs/labels_ci.config \
+  -c src/configs/integration_tests.config
--- a/src/single_cell/process_integrate_annotate/main.nf
+++ b/src/single_cell/process_integrate_annotate/main.nf
@@ -0,0 +1,210 @@
+workflow run_wf {
+  take:
+    input_ch
+
+  main:
+    output_ch = input_ch
+    | map { id, state ->
+      def new_state = state + [ "query_processed": state.output, "_meta": ["join_id": id] ]
+      [id, new_state]
+    }
+    // Make sure parameters are filled out correctly
+    | map { id, state ->
+      def new_state = [:]
+      // Check that at least one of annotation_methods or integration_methods is not empty
+      if (!state.annotation_methods  && !state.integration_methods) {
+        throw new RuntimeException("At least one of --annotation_methods or --integration_methods must be provided")
+      }
+      // Check CellTypist arguments
+      if (state.annotation_methods && state.annotation_methods.contains("celltypist") && 
+        (!state.celltypist_model && !state.reference)) {
+        throw new RuntimeException("Celltypist was selected as an annotation method. Either --celltypist_model or --reference must be provided.")
+      }
+      if (state.annotation_methods && state.annotation_methods.contains("celltypist") && state.celltypist_model && state.reference )  {
+        System.err.println(
+          "Warning: --celltypist_model is set and a --reference was provided. \
+          The pre-trained Celltypist model will be used for annotation, the reference will be ignored."
+        )
+      }
+
+      [id, state + new_state]
+    }
+    | process_samples_workflow.run(
+      fromState: [
+        "input": "input", 
+        "id": "id",
+        "rna_layer": "input_layer",
+        "rna_min_counts": "rna_min_counts",
+        "rna_max_counts": "rna_max_counts",
+        "rna_min_genes_per_cell": "rna_min_genes_per_cell",
+        "rna_max_genes_per_cell": "rna_max_genes_per_cell",
+        "rna_min_cells_per_gene": "rna_min_cells_per_gene",
+        "rna_min_fraction_mito": "rna_min_fraction_mito",
+        "rna_max_fraction_mito": "rna_max_fraction_mito",
+        "rna_min_fraction_ribo": "rna_min_fraction_ribo",
+        "rna_max_fraction_ribo": "rna_max_fraction_ribo",
+        "var_name_mitochondrial_genes": "var_name_mitochondrial_genes",
+        "var_name_ribosomal_genes": "var_name_ribosomal_genes",
+        "var_gene_names": "input_var_gene_names",
+        "mitochondrial_gene_regex": "mitochondrial_gene_regex",
+        "ribosomal_gene_regex": "ribosomal_gene_regex",
+        "var_qc_metrics": "var_qc_metrics"
+      ],
+      args: [
+        "pca_overwrite": "true",
+        "add_id_obs_output": "sample_id",
+        "highly_variable_features_var_output": "filter_with_hvg_query"
+      ],
+      toState: ["query_processed": "output"], 
+    )
+    // Integration methods
+    | harmony_integration.run(
+      runIf: { id, state -> 
+        state.integration_methods && state.integration_methods.contains("harmony") 
+      },
+      fromState: [ 
+        "id": "id",
+        "input": "query_processed",
+        "modality": "modality",
+        "theta": "harmony_theta",
+        "leiden_resolution": "leiden_resolution",
+        "obs_covariates": "harmony_obs_covariates"
+      ],
+      args: [
+        "layer": "log_normalized",
+        "embedding": "X_pca",
+        "obsm_integrated": "X_harmony_integrated",
+        "uns_neighbors": "harmony_integration_neighbors",
+        "obsp_neighbor_distances": "harmony_integration_neighbor_distances",
+        "obsp_neighbor_connectivities": "harmony_integration_neighbor_connectivities",
+        "obs_cluster": "harmony_integration_leiden",
+        "obsm_umap": "X_harmony_umap"
+      ],
+      toState: [ "query_processed": "output" ]
+    )
+
+    | scvi_integration.run(
+      runIf: { id, state -> 
+        state.integration_methods && state.integration_methods.contains("scvi")
+      },
+      fromState: [ 
+        "id": "id",
+        "input": "query_processed",
+        "layer": "input_layer",
+        "modality": "modality",
+        "leiden_resolution": "leiden_resolution",
+        "early_stopping": "early_stopping",
+        "early_stopping_monitor": "early_stopping_monitor",
+        "early_stopping_patience": "early_stopping_patience",
+        "early_stopping_min_delta": "early_stopping_min_delta",
+        "max_epochs": "max_epochs",
+        "reduce_lr_on_plateau": "reduce_lr_on_plateau",
+        "lr_factor": "lr_factor",
+        "lr_patience": "lr_patience"
+      ],
+      args: [
+        "obsm_output": "X_scvi_integrated",
+        "obs_batch": "sample_id",
+        "var_input": "filter_with_hvg_query",
+        "uns_neighbors": "scvi_integration_neighbors",
+        "obsp_neighbor_distances": "scvi_integration_neighbor_distances",
+        "obsp_neighbor_connectivities": "scvi_integration_neighbor_connectivities",
+        "obs_cluster": "scvi_integration_leiden",
+        "obsm_umap": "X_scvi_umap"
+      ],
+      toState: [ "query_processed": "output", "scvi_model": "output_model" ]
+    )
+
+    // Annotation methods
+    | celltypist_annotation.run(
+      runIf: { id, state -> state.annotation_methods && state.annotation_methods.contains("celltypist") && state.celltypist_model },
+      fromState: [ 
+        "input": "query_processed",
+        "modality": "modality",
+        "input_var_gene_names": "input_var_gene_names",
+        "input_reference_gene_overlap": "input_reference_gene_overlap",
+        "model": "celltypist_model",
+        "majority_voting": "celltypist_majority_voting"
+      ],
+      args: [
+        // log normalized counts are expected for celltypist
+        "input_layer": "log_normalized",
+        "output_obs_predictions": "celltypist_pred",
+        "output_obs_probability": "celltypist_proba"
+      ],
+      toState: [ "query_processed": "output" ]
+    )
+
+    | celltypist_annotation.run(
+      runIf: { id, state -> state.annotation_methods && state.annotation_methods.contains("celltypist") && !state.celltypist_model },
+      fromState: [
+        "input": "query_processed",
+        "modality": "modality",
+        "input_var_gene_names": "input_var_gene_names",
+        "input_reference_gene_overlap": "input_reference_gene_overlap",
+        "reference": "reference",
+        "reference_layer": "reference_layer_lognormalized_counts",
+        "reference_obs_target": "reference_obs_label",
+        "reference_var_gene_names": "reference_var_gene_names",
+        "reference_obs_batch": "reference_obs_batch",
+        "reference_var_input": "reference_var_input",
+        "feature_selection": "celltypist_feature_selection",
+        "C": "celltypist_C",
+        "max_iter": "celltypist_max_iter",
+        "use_SGD": "celltypist_use_SGD",
+        "min_prop": "celltypist_min_prop",
+        "majority_voting": "celltypist_majority_voting"
+      ],
+      args: [
+        // log normalized counts are expected for celltypist
+        "input_layer": "log_normalized",
+        "output_obs_predictions": "celltypist_pred",
+        "output_obs_probability": "celltypist_proba"
+      ],
+      toState: [ "query_processed": "output" ]
+    )
+
+    | scanvi_scarches_annotation.run(
+      runIf: { id, state -> state.annotation_methods && state.annotation_methods.contains("scanvi_scarches")},
+      fromState: [
+        "id": "id",
+        "input": "query_processed",
+        "modality": "modality",
+        "layer": "input_layer",
+        "input_var_gene_names": "input_var_gene_names",
+        "reference": "reference",
+        "reference_obs_target": "reference_obs_label",
+        "reference_obs_batch_label": "reference_obs_batch",
+        "reference_var_hvg": "reference_var_input",
+        "reference_var_gene_names": "reference_var_gene_names",
+        "unlabeled_category": "reference_obs_label_unlabeled_category",
+        "early_stopping": "early_stopping",
+        "early_stopping_monitor": "early_stopping_monitor",
+        "early_stopping_patience": "early_stopping_patience",
+        "early_stopping_min_delta": "early_stopping_min_delta",
+        "max_epochs": "max_epochs",
+        "reduce_lr_on_plateau": "reduce_lr_on_plateau",
+        "lr_factor": "lr_factor",
+        "lr_patience": "lr_patience",
+        "leiden_resolution": "leiden_resolution",
+        "knn_weights": "knn_weights",
+        "knn_n_neighbors": "knn_n_neighbors"
+      ],
+      args: [
+        "input_obs_batch_label": "sample_id",
+        "output_obs_predictions": "scanvi_knn_pred",
+        "output_obs_probability": "scanvi_knn_proba"
+      ],
+      toState: [ "query_processed": "output" ]
+    )
+
+    | map {id, state ->
+      def new_state = state + ["output": state.query_processed]
+      [id, new_state]
+    }
+
+    | setState(["output", "_meta"])
+
+  emit:
+    output_ch
+}
--- a/src/single_cell/process_integrate_annotate/nextflow.config
+++ b/src/single_cell/process_integrate_annotate/nextflow.config
@@ -0,0 +1,10 @@
+manifest {
+  nextflowVersion = '!>=20.12.1-edge'
+}
+
+params {
+  rootDir = java.nio.file.Paths.get("$projectDir/../../../").toAbsolutePath().normalize().toString()
+}
+
+// include common settings
+includeConfig("${params.rootDir}/src/configs/labels.config")
--- a/src/single_cell/process_integrate_annotate/test.nf
+++ b/src/single_cell/process_integrate_annotate/test.nf
@@ -0,0 +1,151 @@
+nextflow.enable.dsl=2
+
+include { process_integrate_annotate } from params.rootDir + "/target/nextflow/single_cell/process_integrate_annotate/main.nf"
+params.resources_test = "s3://openpipelines-bio/openpipeline_incubator/resources_test/"
+
+workflow test_wf {
+  resources_test = file(params.resources_test)
+
+  output_ch = Channel.fromList(
+    [
+      [
+        id: "simple_annotation_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        reference_var_gene_names: "ensemblid",
+        reference_layer_lognormalized_counts: "log_normalized",
+        reference_obs_batch: "donor_assay",
+        reference_obs_label: "cell_type",
+        max_epochs: "5",
+        annotation_methods: "celltypist;scanvi_scarches"
+      ],
+      [
+        id: "simple_integration_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        integration_methods: "harmony;scvi"
+      ],
+      [
+        id: "simple_execution_test",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        reference_var_gene_names: "ensemblid",
+        reference_layer_lognormalized_counts: "log_normalized",
+        reference_obs_batch: "donor_assay",
+        reference_obs_label: "cell_type",
+        max_epochs: "5",
+        annotation_methods: "scanvi_scarches",
+        integration_methods: "harmony"
+      ]
+    ])
+    | view {"State at start: $it"}
+    | map{ state -> [state.id, state] }
+    | process_integrate_annotate 
+    | view {"After AaaS: $it"}
+    | view { output ->
+      assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
+
+      // check id
+      def id = output[0]
+      assert id == "merged" : "Output ID should be `merged`"
+
+      // check output
+      def state = output[1]
+      assert state instanceof Map : "State should be a map. Found: ${state}"
+      assert state.containsKey("output") : "Output should contain key 'output'."
+      assert state.output.isFile() : "'output' should be a file."
+      assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}"
+    
+    "Output: $output"
+  }
+}
+
+workflow test_wf_2 {
+  resources_test = file(params.resources_test)
+
+  output_ch = Channel.fromList(
+    [
+      [
+        id: "pbmc_with_more_params",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        rna_min_counts: 2,
+        rna_max_counts: 1000000,
+        rna_min_genes_per_cell: 1,
+        rna_max_genes_per_cell: 1000000,
+        rna_min_cells_per_gene: 1,
+        rna_min_fraction_mito: 0.0,
+        rna_max_fraction_mito: 1.0,
+        prot_min_counts: 3,
+        prot_max_counts: 1000000,
+        prot_min_proteins_per_cell: 1,
+        prot_max_proteins_per_cell: 1000000,
+        prot_min_cells_per_protein: 1,
+        var_name_mitochondrial_genes: 'mitochondrial',
+        obs_name_mitochondrial_fraction: 'fraction_mitochondrial',
+        add_id_to_obs: true,
+        add_id_make_observation_keys_unique: true,
+        add_id_obs_output: "sample_id",
+        reference: resources_test.resolve("annotation_test_data/TS_Blood_filtered.h5mu"),
+        reference_var_gene_names: "ensemblid",
+        reference_layer_lognormalized_counts: "log_normalized",
+        reference_obs_batch: "donor_assay",
+        reference_obs_label: "cell_type",
+        annotation_methods: "celltypist",
+        integration_methods: "scvi"
+      ]
+    ])
+    | view {"State at start: $it"}
+    | map { state -> [state.id, state] }
+    | process_integrate_annotate 
+    | view {"After AaaS: $it"}
+    | view { output ->
+      assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
+
+      // check id
+      def id = output[0]
+      assert id == "merged" : "Output ID should be `merged`"
+
+      // check output
+      def state = output[1]
+      assert state instanceof Map : "State should be a map. Found: ${state}"
+      assert state.containsKey("output") : "Output should contain key 'output'."
+      assert state.output.isFile() : "'output' should be a file."
+      assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}"
+    
+      "Output: $output"
+    }
+  }
+
+workflow test_wf_3 {
+  resources_test = file(params.resources_test)
+
+  output_ch = Channel.fromList(
+    [
+      [
+        id: "celltypist_model",
+        input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
+        celltypist_model: resources_test.resolve("annotation_test_data/celltypist_model_Immune_All_Low.pkl"),
+        annotation_methods: "celltypist",
+        input_var_gene_names: "gene_symbol"
+      ]
+    ])
+    | view {"State at start: $it"}
+    | map{ state -> [state.id, state] }
+    | process_integrate_annotate 
+    | view {"After AaaS: $it"}
+    | view { output ->
+      assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
+
+      // check id
+      def id = output[0]
+      assert id == "merged" : "Output ID should be `merged`"
+
+      // check output
+      def state = output[1]
+      assert state instanceof Map : "State should be a map. Found: ${state}"
+      assert state.containsKey("output") : "Output should contain key 'output'."
+      assert state.output.isFile() : "'output' should be a file."
+      assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}"
+    
+    "Output: $output"
+  }
+}
--- a/target/.build.yaml
+++ b/target/.build.yaml
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/.config.vsh.yaml
@@ -0,0 +1,228 @@
+name: "split_modalities"
+namespace: "workflows/multiomics"
+version: "3.0.0"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "string"
+    name: "--id"
+    description: "ID of the sample."
+    info: null
+    example:
+    - "foo"
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Path to the sample."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output directory containing multiple h5mu files."
+    info: null
+    example:
+    - "/path/to/output"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output_types"
+    description: "A csv containing the base filename and modality type per output\
+      \ file."
+    info: null
+    example:
+    - "types.csv"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "nextflow_script"
+  path: "main.nf"
+  is_executable: true
+  entrypoint: "run_wf"
+- type: "file"
+  path: "utils"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "A pipeline to split a multimodal mudata files into several unimodal\
+  \ mudata files."
+test_resources:
+- type: "nextflow_script"
+  path: "test.nf"
+  is_executable: true
+  entrypoint: "test_wf"
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
+info:
+  test_dependencies:
+  - name: "split_modalities_test"
+    namespace: "test_workflows/multiomics"
+status: "enabled"
+scope:
+  image: "private"
+  target: "private"
+dependencies:
+- name: "dataflow/split_modalities"
+  alias: "split_modalities_component"
+  repository:
+    type: "local"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+build_info:
+  config: "src/workflows/multiomics/split_modalities/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "native"
+  output: "target/_private/nextflow/workflows/multiomics/split_modalities"
+  executable: "target/_private/nextflow/workflows/multiomics/split_modalities/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+  dependencies:
+  - "target/nextflow/dataflow/split_modalities"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'workflows/multiomics/split_modalities'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'A pipeline to split a multimodal mudata files into several unimodal mudata files.'
+  author = 'Dries Schaumont'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/errorstrat_ignore.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/errorstrat_ignore.config
@@ -0,0 +1 @@
+process.errorStrategy = 'ignore' 
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/integration_tests.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/integration_tests.config
@@ -0,0 +1,36 @@
+profiles {
+
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/labels_ci.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/_private/nextflow/workflows/multiomics/split_modalities/utils/labels_ci.config
@@ -0,0 +1,33 @@
+process {
+  withLabel: lowmem { memory = 13.Gb }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midmem { memory = 13.Gb }
+  withLabel: midcpu { cpus = 4 }
+  withLabel: highmem { memory = 13.Gb }
+  withLabel: highcpu { cpus = 4 }
+  withLabel: veryhighmem { memory = 13.Gb }
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+}
+
+env.NUMBA_CACHE_DIR = '/tmp'
+
+trace {
+    enabled = true
+    overwrite = true
+}
+dag {
+  overwrite = true
+}
+
+process.maxForks = 1
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/.config.vsh.yaml
@@ -0,0 +1,466 @@
+name: "celltypist"
+namespace: "annotate"
+version: "3.0.0"
+authors:
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+- name: "Weiwei Schultz"
+  roles:
+  - "contributor"
+  info:
+    role: "Contributor"
+    organizations:
+    - name: "Janssen R&D US"
+      role: "Associate Director Data Sciences"
+argument_groups:
+- name: "Inputs"
+  description: "Input dataset (query) arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "The input (query) data to be labeled. Should be a .h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "The layer in the input data containing log normalized counts to\
+      \ be used for cell type annotation if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_gene_names"
+    description: "The name of the adata var column in the input data containing gene\
+      \ names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--input_reference_gene_overlap"
+    description: "The minimum number of genes present in both the reference and query\
+      \ datasets.\n"
+    info: null
+    default:
+    - 100
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "The reference data to train the CellTypist classifiers on. Only\
+      \ required if a pre-trained --model is not provided."
+    info: null
+    example:
+    - "reference.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "The layer in the reference data to be used for cell type annotation\
+      \ if .X is not to be used. Data are expected to be processed in the same way\
+      \ as the --input query dataset."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_target"
+    description: "The name of the adata obs column in the reference data containing\
+      \ cell type annotations."
+    info: null
+    default:
+    - "cell_ontology_class"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_gene_names"
+    description: "The name of the adata var column in the reference data containing\
+      \ gene names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_input"
+    description: ".var column containing highly variable genes. By default, do not\
+      \ subset genes.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Model arguments"
+  description: "Model arguments."
+  arguments:
+  - type: "file"
+    name: "--model"
+    description: "Pretrained model in pkl format. If not provided, the model will\
+      \ be trained on the reference data and --reference should be provided."
+    info: null
+    example:
+    - "pretrained_model.pkl"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--feature_selection"
+    description: "Whether to perform feature selection."
+    info: null
+    default:
+    - false
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--majority_voting"
+    description: "Whether to refine the predicted labels by running the majority voting\
+      \ classifier after over-clustering."
+    info: null
+    default:
+    - false
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--C"
+    description: "Inverse of regularization strength in logistic regression."
+    info: null
+    default:
+    - 1.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_iter"
+    description: "Maximum number of iterations before reaching the minimum of the\
+      \ cost function."
+    info: null
+    default:
+    - 1000
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--use_SGD"
+    description: "Whether to use the stochastic gradient descent algorithm."
+    info: null
+    direction: "input"
+  - type: "double"
+    name: "--min_prop"
+    description: "\"For the dominant cell type within a subcluster, the minimum proportion\
+      \ of cells required to \nsupport naming of the subcluster by this cell type.\
+      \ Ignored if majority_voting is set to False. \nSubcluster that fails to pass\
+      \ this proportion threshold will be assigned 'Heterogeneous'.\"\n"
+    info: null
+    default:
+    - 0.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_predictions"
+    description: "In which `.obs` slots to store the predicted information.\n"
+    info: null
+    default:
+    - "celltypist_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_probability"
+    description: "In which `.obs` slots to store the probability of the predictions.\n"
+    info: null
+    default:
+    - "celltypist_probability"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "cross_check_genes.py"
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Automated cell type annotation tool for scRNA-seq datasets on the basis\
+  \ of logistic regression classifiers optimised by the stochastic gradient descent\
+  \ algorithm."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "annotation_test_data"
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "highmem"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.10-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "scanpy~=1.10.4"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "celltypist==1.6.3"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/annotate/celltypist/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/annotate/celltypist"
+  executable: "target/nextflow/annotate/celltypist/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/cross_check_genes.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/cross_check_genes.py
@@ -0,0 +1,26 @@
+from typing import List
+
+
+def cross_check_genes(
+    query_genes: List[str], reference_genes: List[str], min_gene_overlap: int = 100
+) -> List[str]:
+    """Cross check the overlap between two lists of genes
+
+    Parameters
+    ----------
+    query_genes : List[str]
+        List of gene names
+    reference_genes : List[str]
+       List of gene names
+
+    Returns
+    -------
+    List[str]
+        List of overlapping genes
+    """
+    common_ens_ids = list(set(reference_genes).intersection(set(query_genes)))
+    assert len(common_ens_ids) >= min_gene_overlap, (
+        f"The intersection of genes between the query and reference dataset is too small, expected at least {min_gene_overlap}."
+    )
+
+    return common_ens_ids
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'annotate/celltypist'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm.'
+  author = 'Jakub Majercik, Weiwei Schultz'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow_params.yaml
@@ -0,0 +1,34 @@
+# Inputs
+input: # please fill in - example: "input.h5mu"
+modality: "rna"
+# input_layer: "foo"
+# input_var_gene_names: "foo"
+input_reference_gene_overlap: 100
+
+# Reference
+# reference: "reference.h5mu"
+# reference_layer: "foo"
+reference_obs_target: "cell_ontology_class"
+# reference_var_gene_names: "foo"
+# reference_var_input: "foo"
+
+# Model arguments
+# model: "pretrained_model.pkl"
+feature_selection: false
+majority_voting: false
+C: 1.0
+max_iter: 1000
+use_SGD: false
+min_prop: 0.0
+
+# Outputs
+# output: "$id.$key.output.h5mu"
+output_obs_predictions: "celltypist_pred"
+output_obs_probability: "celltypist_probability"
+# output_compression: "gzip"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
+
+# Arguments
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/nextflow_schema.json
@@ -0,0 +1,205 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "celltypist",
+  "description": "Automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm.",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "inputs": {
+      "title": "Inputs",
+      "type": "object",
+      "description": "Input dataset (query) arguments",
+      "properties": {
+        "input": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "The input (query) data to be labeled",
+          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
+        },
+        "modality": {
+          "type": "string",
+          "description": "Which modality to process.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
+          "default": "rna"
+        },
+        "input_layer": {
+          "type": "string",
+          "description": "The layer in the input data containing log normalized counts to be used for cell type annotation if .X is not to be used.",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "input_var_gene_names": {
+          "type": "string",
+          "description": "The name of the adata var column in the input data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "input_reference_gene_overlap": {
+          "type": "integer",
+          "description": "The minimum number of genes present in both the reference and query datasets.\n",
+          "help_text": "Type: `integer`, multiple: `False`, default: `100`. ",
+          "default": 100
+        }
+      }
+    },
+    "outputs": {
+      "title": "Outputs",
+      "type": "object",
+      "description": "Output arguments.",
+      "properties": {
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output h5mu file.",
+          "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
+          "default": "$id.$key.output.h5mu"
+        },
+        "output_obs_predictions": {
+          "type": "string",
+          "description": "In which `.obs` slots to store the predicted information.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"celltypist_pred\"`. ",
+          "default": "celltypist_pred"
+        },
+        "output_obs_probability": {
+          "type": "string",
+          "description": "In which `.obs` slots to store the probability of the predictions.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"celltypist_probability\"`. ",
+          "default": "celltypist_probability"
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "reference": {
+      "title": "Reference",
+      "type": "object",
+      "description": "Arguments related to the reference dataset.",
+      "properties": {
+        "reference": {
+          "type": "string",
+          "format": "path",
+          "description": "The reference data to train the CellTypist classifiers on",
+          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"reference.h5mu\"`. "
+        },
+        "reference_layer": {
+          "type": "string",
+          "description": "The layer in the reference data to be used for cell type annotation if .X is not to be used",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "reference_obs_target": {
+          "type": "string",
+          "description": "The name of the adata obs column in the reference data containing cell type annotations.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"cell_ontology_class\"`. ",
+          "default": "cell_ontology_class"
+        },
+        "reference_var_gene_names": {
+          "type": "string",
+          "description": "The name of the adata var column in the reference data containing gene names; when no gene_name_layer is provided, the var index will be used.\n",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "reference_var_input": {
+          "type": "string",
+          "description": ".var column containing highly variable genes",
+          "help_text": "Type: `string`, multiple: `False`. "
+        }
+      }
+    },
+    "model arguments": {
+      "title": "Model arguments",
+      "type": "object",
+      "description": "Model arguments.",
+      "properties": {
+        "model": {
+          "type": "string",
+          "format": "path",
+          "description": "Pretrained model in pkl format",
+          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"pretrained_model.pkl\"`. "
+        },
+        "feature_selection": {
+          "type": "boolean",
+          "description": "Whether to perform feature selection.",
+          "help_text": "Type: `boolean`, multiple: `False`, default: `false`. ",
+          "default": false
+        },
+        "majority_voting": {
+          "type": "boolean",
+          "description": "Whether to refine the predicted labels by running the majority voting classifier after over-clustering.",
+          "help_text": "Type: `boolean`, multiple: `False`, default: `false`. ",
+          "default": false
+        },
+        "C": {
+          "type": "number",
+          "description": "Inverse of regularization strength in logistic regression.",
+          "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
+          "default": 1.0
+        },
+        "max_iter": {
+          "type": "integer",
+          "description": "Maximum number of iterations before reaching the minimum of the cost function.",
+          "help_text": "Type: `integer`, multiple: `False`, default: `1000`. ",
+          "default": 1000
+        },
+        "use_SGD": {
+          "type": "boolean",
+          "description": "Whether to use the stochastic gradient descent algorithm.",
+          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
+          "default": false
+        },
+        "min_prop": {
+          "type": "number",
+          "description": "\"For the dominant cell type within a subcluster, the minimum proportion of cells required to \nsupport naming of the subcluster by this cell type",
+          "help_text": "Type: `double`, multiple: `False`, default: `0.0`. ",
+          "default": 0.0
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/inputs"
+    },
+    {
+      "$ref": "#/$defs/outputs"
+    },
+    {
+      "$ref": "#/$defs/reference"
+    },
+    {
+      "$ref": "#/$defs/model arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/set_var_index.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/subset_vars.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/celltypist/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/.config.vsh.yaml
@@ -0,0 +1,475 @@
+name: "scanvi"
+namespace: "annotate"
+version: "3.0.0"
+authors:
+- name: "Dorien Roosen"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dorien@data-intuitive.com"
+      github: "dorien-er"
+      linkedin: "dorien-roosen"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+- name: "Weiwei Schultz"
+  roles:
+  - "contributor"
+  info:
+    role: "Contributor"
+    organizations:
+    - name: "Janssen R&D US"
+      role: "Associate Director Data Sciences"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5mu file. Note that this needs to be the exact same dataset\
+      \ as the --scvi_model was trained on."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "Input layer to use. If None, X is used"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_input"
+    description: ".var column containing highly variable genes that were used to train\
+      \ the scVi model. By default, do not subset genes."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_gene_names"
+    description: ".var column containing gene names. By default, use the index."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_labels"
+    description: ".obs field containing the labels"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--unlabeled_category"
+    description: "Value in the --obs_labels field that indicates unlabeled observations\n"
+    info: null
+    default:
+    - "Unknown"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "scVI Model"
+  arguments:
+  - type: "file"
+    name: "--scvi_model"
+    description: "Pretrained SCVI reference model to initialize the SCANVI model with."
+    info: null
+    example:
+    - "scvi_model.pt"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output_model"
+    description: "Folder where the state of the trained model will be saved to."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_output"
+    description: "In which .obsm slot to store the resulting integrated embedding."
+    info: null
+    default:
+    - "X_scanvi_integrated"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_output_predictions"
+    description: "In which .obs slot to store the predicted labels."
+    info: null
+    default:
+    - "scanvi_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_output_probabilities"
+    description: "In which. obs slot to store the probabilities of the predicted labels."
+    info: null
+    default:
+    - "scanvi_proba"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "scANVI training arguments"
+  arguments:
+  - type: "boolean"
+    name: "--early_stopping"
+    description: "Whether to perform early stopping with respect to the validation\
+      \ set."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--early_stopping_monitor"
+    description: "Metric logged during validation set epoch."
+    info: null
+    default:
+    - "elbo_validation"
+    required: false
+    choices:
+    - "elbo_validation"
+    - "reconstruction_loss_validation"
+    - "kl_local_validation"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--early_stopping_patience"
+    description: "Number of validation epochs with no improvement after which training\
+      \ will be stopped."
+    info: null
+    default:
+    - 45
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--early_stopping_min_delta"
+    description: "Minimum change in the monitored quantity to qualify as an improvement,\
+      \ i.e. an absolute change of less than min_delta, will count as no improvement."
+    info: null
+    default:
+    - 0.0
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_epochs"
+    description: "Number of passes through the dataset, defaults to (20000 / number\
+      \ of cells) * 400 or 400; whichever is smallest."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--reduce_lr_on_plateau"
+    description: "Whether to monitor validation loss and reduce learning rate when\
+      \ validation set `lr_scheduler_metric` plateaus."
+    info: null
+    default:
+    - true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--lr_factor"
+    description: "Factor to reduce learning rate."
+    info: null
+    default:
+    - 0.6
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--lr_patience"
+    description: "Number of epochs with no improvement after which learning rate will\
+      \ be reduced."
+    info: null
+    default:
+    - 30.0
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "scANVI () is a semi-supervised model for single-cell transcriptomics\
+  \ data. scANVI is an scVI extension that can leverage the cell type knowledge for\
+  \ a subset of the cells present in the data sets to infer the states of the rest\
+  \ of the cells.\nThis component will instantiate a scANVI model from a pre-trained\
+  \ scVI model, integrate the data and perform label prediction.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "scvi_model"
+- type: "file"
+  path: "TS_Blood_filtered.h5mu"
+- type: "file"
+  path: "pbmc_1k_protein_v3_mms.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "midcpu"
+    - "midmem"
+    - "gpu"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "nvcr.io/nvidia/pytorch:25.05-py3"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "jax[cuda]"
+    - "scvi-tools~=1.3.1"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/annotate/scanvi/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/annotate/scanvi"
+  executable: "target/nextflow/annotate/scanvi/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/compress_h5mu.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'annotate/scanvi'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'scANVI () is a semi-supervised model for single-cell transcriptomics data. scANVI is an scVI extension that can leverage the cell type knowledge for a subset of the cells present in the data sets to infer the states of the rest of the cells.\nThis component will instantiate a scANVI model from a pre-trained scVI model, integrate the data and perform label prediction.\n'
+  author = 'Dorien Roosen, Jakub Majercik, Weiwei Schultz'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow_params.yaml
@@ -0,0 +1,35 @@
+# Inputs
+input: # please fill in - example: "path/to/file"
+modality: "rna"
+# input_layer: "foo"
+# var_input: "foo"
+# var_gene_names: "foo"
+obs_labels: # please fill in - example: "foo"
+unlabeled_category: "Unknown"
+
+# scVI Model
+scvi_model: # please fill in - example: "scvi_model.pt"
+
+# Outputs
+# output: "$id.$key.output"
+# output_model: "$id.$key.output_model"
+obsm_output: "X_scanvi_integrated"
+obs_output_predictions: "scanvi_pred"
+obs_output_probabilities: "scanvi_proba"
+# output_compression: "gzip"
+
+# scANVI training arguments
+# early_stopping: true
+early_stopping_monitor: "elbo_validation"
+early_stopping_patience: 45
+early_stopping_min_delta: 0.0
+# max_epochs: 123
+reduce_lr_on_plateau: true
+lr_factor: 0.6
+lr_patience: 30.0
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
+
+# Arguments
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/nextflow_schema.json
@@ -0,0 +1,217 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "scanvi",
+  "description": "scANVI () is a semi-supervised model for single-cell transcriptomics data. scANVI is an scVI extension that can leverage the cell type knowledge for a subset of the cells present in the data sets to infer the states of the rest of the cells.\nThis component will instantiate a scANVI model from a pre-trained scVI model, integrate the data and perform label prediction.\n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "inputs": {
+      "title": "Inputs",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "Input h5mu file",
+          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`. "
+        },
+        "modality": {
+          "type": "string",
+          "description": "Which modality from the input MuData file to process.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
+          "default": "rna"
+        },
+        "input_layer": {
+          "type": "string",
+          "description": "Input layer to use",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "var_input": {
+          "type": "string",
+          "description": ".var column containing highly variable genes that were used to train the scVi model",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "var_gene_names": {
+          "type": "string",
+          "description": ".var column containing gene names",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "obs_labels": {
+          "type": "string",
+          "description": ".obs field containing the labels",
+          "help_text": "Type: `string`, multiple: `False`, required. "
+        },
+        "unlabeled_category": {
+          "type": "string",
+          "description": "Value in the --obs_labels field that indicates unlabeled observations\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"Unknown\"`. ",
+          "default": "Unknown"
+        }
+      }
+    },
+    "outputs": {
+      "title": "Outputs",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output h5mu file.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output\"`, direction: `output`. ",
+          "default": "$id.$key.output"
+        },
+        "output_model": {
+          "type": "string",
+          "format": "path",
+          "description": "Folder where the state of the trained model will be saved to.",
+          "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_model\"`, direction: `output`. ",
+          "default": "$id.$key.output_model"
+        },
+        "obsm_output": {
+          "type": "string",
+          "description": "In which .obsm slot to store the resulting integrated embedding.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"X_scanvi_integrated\"`. ",
+          "default": "X_scanvi_integrated"
+        },
+        "obs_output_predictions": {
+          "type": "string",
+          "description": "In which .obs slot to store the predicted labels.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"scanvi_pred\"`. ",
+          "default": "scanvi_pred"
+        },
+        "obs_output_probabilities": {
+          "type": "string",
+          "description": "In which",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"scanvi_proba\"`. ",
+          "default": "scanvi_proba"
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "scvi model": {
+      "title": "scVI Model",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "scvi_model": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "Pretrained SCVI reference model to initialize the SCANVI model with.",
+          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"scvi_model.pt\"`. "
+        }
+      }
+    },
+    "scanvi training arguments": {
+      "title": "scANVI training arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "early_stopping": {
+          "type": "boolean",
+          "description": "Whether to perform early stopping with respect to the validation set.",
+          "help_text": "Type: `boolean`, multiple: `False`. "
+        },
+        "early_stopping_monitor": {
+          "type": "string",
+          "description": "Metric logged during validation set epoch.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"elbo_validation\"`, choices: ``elbo_validation`, `reconstruction_loss_validation`, `kl_local_validation``. ",
+          "enum": [
+            "elbo_validation",
+            "reconstruction_loss_validation",
+            "kl_local_validation"
+          ],
+          "default": "elbo_validation"
+        },
+        "early_stopping_patience": {
+          "type": "integer",
+          "description": "Number of validation epochs with no improvement after which training will be stopped.",
+          "help_text": "Type: `integer`, multiple: `False`, default: `45`. ",
+          "default": 45
+        },
+        "early_stopping_min_delta": {
+          "type": "number",
+          "description": "Minimum change in the monitored quantity to qualify as an improvement, i.e",
+          "help_text": "Type: `double`, multiple: `False`, default: `0.0`. ",
+          "default": 0.0
+        },
+        "max_epochs": {
+          "type": "integer",
+          "description": "Number of passes through the dataset, defaults to (20000 / number of cells) * 400 or 400; whichever is smallest.",
+          "help_text": "Type: `integer`, multiple: `False`. "
+        },
+        "reduce_lr_on_plateau": {
+          "type": "boolean",
+          "description": "Whether to monitor validation loss and reduce learning rate when validation set `lr_scheduler_metric` plateaus.",
+          "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
+          "default": true
+        },
+        "lr_factor": {
+          "type": "number",
+          "description": "Factor to reduce learning rate.",
+          "help_text": "Type: `double`, multiple: `False`, default: `0.6`. ",
+          "default": 0.6
+        },
+        "lr_patience": {
+          "type": "number",
+          "description": "Number of epochs with no improvement after which learning rate will be reduced.",
+          "help_text": "Type: `double`, multiple: `False`, default: `30.0`. ",
+          "default": 30.0
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/inputs"
+    },
+    {
+      "$ref": "#/$defs/outputs"
+    },
+    {
+      "$ref": "#/$defs/scvi model"
+    },
+    {
+      "$ref": "#/$defs/scanvi training arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/set_var_index.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/subset_vars.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/annotate/scanvi/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/.config.vsh.yaml
@@ -0,0 +1,300 @@
+name: "leiden"
+namespace: "cluster"
+version: "3.0.0"
+authors:
+- name: "Dries De Maeyer"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "ddemaeyer@gmail.com"
+      github: "ddemaeyer"
+      linkedin: "dries-de-maeyer-b46a814"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsp_connectivities"
+    description: "In which .obsp slot the neighbor connectivities can be found."
+    info: null
+    default:
+    - "connectivities"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_name"
+    description: "Name of the .obsm key under which to add the cluster labels.\nThe\
+      \ name of the columns in the matrix will correspond to the resolutions.\n"
+    info: null
+    default:
+    - "leiden"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--resolution"
+    description: "A parameter value controlling the coarseness of the clustering.\
+      \ Higher values lead to more clusters.\nMultiple values will result in clustering\
+      \ being performed multiple times.\n"
+    info: null
+    default:
+    - 1.0
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Cluster cells using the [Leiden algorithm] [Traag18] implemented in\
+  \ the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain\
+  \ algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]\
+  \ [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn`\
+  \ first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in\
+  \ large networks, J. Stat. Mech.  \n[Levine15]: Levine et al. (2015), Data-Driven\
+  \ Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with\
+  \ Prognosis, Cell.  \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing\
+  \ well-connected communities arXiv.  \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale\
+  \ single-cell gene expression data analysis, Genome Biology.  \n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "midmem"
+    - "middisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.13-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    - "leidenalg~=0.10.0"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/cluster/leiden/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/cluster/leiden"
+  executable: "target/nextflow/cluster/leiden/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/compress_h5mu.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'cluster/leiden'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15] [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech.  \n[Levine15]: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell.  \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv.  \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology.  \n'
+  author = 'Dries De Maeyer'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow_params.yaml
@@ -0,0 +1,12 @@
+# Arguments
+input: # please fill in - example: "input.h5mu"
+modality: "rna"
+obsp_connectivities: "connectivities"
+# output: "$id.$key.output.h5mu"
+obsm_name: "leiden"
+resolution: # please fill in - example: [1.0]
+# output_compression: "gzip"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/nextflow_schema.json
@@ -0,0 +1,101 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "leiden",
+  "description": "Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15] [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech.  \n[Levine15]: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell.  \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv.  \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology.  \n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "arguments": {
+      "title": "Arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "Input file.",
+          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
+        },
+        "modality": {
+          "type": "string",
+          "description": "Which modality from the input MuData file to process.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
+          "default": "rna"
+        },
+        "obsp_connectivities": {
+          "type": "string",
+          "description": "In which .obsp slot the neighbor connectivities can be found.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"connectivities\"`. ",
+          "default": "connectivities"
+        },
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output file.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
+          "default": "$id.$key.output.h5mu"
+        },
+        "obsm_name": {
+          "type": "string",
+          "description": "Name of the .obsm key under which to add the cluster labels.\nThe name of the columns in the matrix will correspond to the resolutions.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"leiden\"`. ",
+          "default": "leiden"
+        },
+        "resolution": {
+          "type": "array",
+          "items": {
+            "type": "number"
+          },
+          "description": "A parameter value controlling the coarseness of the clustering",
+          "help_text": "Type: `double`, multiple: `True`, required, default: `[1.0]`. ",
+          "default": [
+            1.0
+          ]
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/cluster/leiden/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/.config.vsh.yaml
@@ -0,0 +1,332 @@
+name: "concatenate_h5mu"
+namespace: "dataflow"
+version: "3.0.0"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Paths to the different samples to be concatenated."
+    info: null
+    example:
+    - "sample_paths"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Only output concatenated objects for the provided modalities. Outputs\
+      \ all modalities by default."
+    info: null
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_id"
+    description: "Names of the different samples that have to be concatenated.  Must\
+      \ be specified when using '--mode move'.\nIn this case, the ids will be used\
+      \ for the columns names of the dataframes registring the conflicts.\nIf specified,\
+      \ must be of same length as `--input`.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output location for the concatenated MuData object file.\n"
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_sample_name"
+    description: "Name of the .obs key under which to add the sample names."
+    info: null
+    default:
+    - "sample_id"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--other_axis_mode"
+    description: "How to handle the merging of other axis (var, obs, ...).\n\n  -\
+      \ None: keep no data\n  - same: only keep elements of the matrices which are\
+      \ the same in each of the samples\n  - unique: only keep elements for which\
+      \ there is only 1 possible value (1 value that can occur in multiple samples)\n\
+      \  - first: keep the annotation from the first sample\n  - only: keep elements\
+      \ that show up in only one of the objects (1 unique element in only 1 sample)\n\
+      \  - move: identical to 'same', but moving the conflicting values to .varm or\
+      \ .obsm\n"
+    info: null
+    default:
+    - "move"
+    required: false
+    choices:
+    - "same"
+    - "unique"
+    - "first"
+    - "only"
+    - "concat"
+    - "move"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--uns_merge_mode"
+    description: "How to handle the merging of .uns across modalities\n  - None: keep\
+      \ no data\n  - same: only keep elements of the matrices which are the same in\
+      \ each of the samples\n  - unique: only keep elements for which there is only\
+      \ 1 possible value (1 value that can occur in multiple samples)\n  - first:\
+      \ keep the annotation from the first sample\n  - only: keep elements that show\
+      \ up in only one of the objects (1 unique element in only 1 sample)\n  - make_unique:\
+      \ identical to 'unique', but keys which are not unique are made unique by prefixing\
+      \ them with the sample id.\n"
+    info: null
+    default:
+    - "make_unique"
+    required: false
+    choices:
+    - "same"
+    - "unique"
+    - "first"
+    - "only"
+    - "make_unique"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Concatenate observations from samples in several (uni- and/or multi-modal)\
+  \ MuData files into a single file.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"
+- type: "file"
+  path: "human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "midcpu"
+    - "highmem"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "pandas~=2.1.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/dataflow/concatenate_h5mu/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/dataflow/concatenate_h5mu"
+  executable: "target/nextflow/dataflow/concatenate_h5mu/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/compress_h5mu.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'dataflow/concatenate_h5mu'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Concatenate observations from samples in several (uni- and/or multi-modal) MuData files into a single file.\n'
+  author = 'Dries Schaumont'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow_params.yaml
@@ -0,0 +1,13 @@
+# Arguments
+input: # please fill in - example: ["sample_paths"]
+# modality: ["foo"]
+# input_id: ["foo"]
+# output: "$id.$key.output.h5mu"
+obs_sample_name: "sample_id"
+other_axis_mode: "move"
+uns_merge_mode: "make_unique"
+# output_compression: "gzip"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/nextflow_schema.json
@@ -0,0 +1,124 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "concatenate_h5mu",
+  "description": "Concatenate observations from samples in several (uni- and/or multi-modal) MuData files into a single file.\n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "arguments": {
+      "title": "Arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "format": "path",
+          "exists": true,
+          "description": "Paths to the different samples to be concatenated.",
+          "help_text": "Type: `file`, multiple: `True`, required, direction: `input`, example: `[\"sample_paths\"]`. "
+        },
+        "modality": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Only output concatenated objects for the provided modalities",
+          "help_text": "Type: `string`, multiple: `True`. "
+        },
+        "input_id": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Names of the different samples that have to be concatenated",
+          "help_text": "Type: `string`, multiple: `True`. "
+        },
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output location for the concatenated MuData object file.\n",
+          "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
+          "default": "$id.$key.output.h5mu"
+        },
+        "obs_sample_name": {
+          "type": "string",
+          "description": "Name of the .obs key under which to add the sample names.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"sample_id\"`. ",
+          "default": "sample_id"
+        },
+        "other_axis_mode": {
+          "type": "string",
+          "description": "How to handle the merging of other axis (var, obs, ...).\n\n  - None: keep no data\n  - same: only keep elements of the matrices which are the same in each of the samples\n  - unique: only keep elements for which there is only 1 possible value (1 value that can occur in multiple samples)\n  - first: keep the annotation from the first sample\n  - only: keep elements that show up in only one of the objects (1 unique element in only 1 sample)\n  - move: identical to 'same', but moving the conflicting values to .varm or .obsm\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"move\"`, choices: ``same`, `unique`, `first`, `only`, `concat`, `move``. ",
+          "enum": [
+            "same",
+            "unique",
+            "first",
+            "only",
+            "concat",
+            "move"
+          ],
+          "default": "move"
+        },
+        "uns_merge_mode": {
+          "type": "string",
+          "description": "How to handle the merging of .uns across modalities\n  - None: keep no data\n  - same: only keep elements of the matrices which are the same in each of the samples\n  - unique: only keep elements for which there is only 1 possible value (1 value that can occur in multiple samples)\n  - first: keep the annotation from the first sample\n  - only: keep elements that show up in only one of the objects (1 unique element in only 1 sample)\n  - make_unique: identical to 'unique', but keys which are not unique are made unique by prefixing them with the sample id.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"make_unique\"`, choices: ``same`, `unique`, `first`, `only`, `make_unique``. ",
+          "enum": [
+            "same",
+            "unique",
+            "first",
+            "only",
+            "make_unique"
+          ],
+          "default": "make_unique"
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/concatenate_h5mu/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/.config.vsh.yaml
@@ -0,0 +1,246 @@
+name: "merge"
+namespace: "dataflow"
+version: "3.0.0"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Paths to the single-modality .h5mu files that need to be combined"
+    info: null
+    default:
+    - "sample_paths"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Path to the output file."
+    info: null
+    default:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "The compression format to be used on the output h5mu object."
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Combine one or more single-modality .h5mu files together into one .h5mu\
+  \ file.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu"
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "singlecpu"
+    - "highmem"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/dataflow/merge/config.vsh.yml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/dataflow/merge"
+  executable: "target/nextflow/dataflow/merge/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'dataflow/merge'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n'
+  author = 'Dries Schaumont'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow_params.yaml
@@ -0,0 +1,8 @@
+# Arguments
+input: # please fill in - example: ["sample_paths"]
+# output: "output.h5mu"
+# output_compression: "gzip"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/nextflow_schema.json
@@ -0,0 +1,78 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "merge",
+  "description": "Combine one or more single-modality .h5mu files together into one .h5mu file.\n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "arguments": {
+      "title": "Arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "format": "path",
+          "exists": true,
+          "description": "Paths to the single-modality .h5mu files that need to be combined",
+          "help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`. ",
+          "default": [
+            "sample_paths"
+          ]
+        },
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Path to the output file.",
+          "help_text": "Type: `file`, multiple: `False`, default: `\"output.h5mu\"`, direction: `output`. ",
+          "default": "output.h5mu"
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "The compression format to be used on the output h5mu object.",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/merge/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/.config.vsh.yaml
@@ -0,0 +1,273 @@
+name: "split_modalities"
+namespace: "dataflow"
+version: "3.0.0"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+- name: "Robrecht Cannoodt"
+  roles:
+  - "contributor"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Path to a single .h5mu file."
+    info: null
+    default:
+    - "sample_path"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output directory containing multiple h5mu files."
+    info: null
+    example:
+    - "/path/to/output"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output_types"
+    description: "A csv containing the base filename and modality type per output\
+      \ file."
+    info: null
+    example:
+    - "types.csv"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Split the modalities from a single .h5mu multimodal sample into seperate\
+  \ .h5mu files. \n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "singlecpu"
+    - "lowmem"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/dataflow/split_modalities/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/dataflow/split_modalities"
+  executable: "target/nextflow/dataflow/split_modalities/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'dataflow/split_modalities'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n'
+  author = 'Dries Schaumont, Robrecht Cannoodt'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow_params.yaml
@@ -0,0 +1,9 @@
+# Arguments
+input: # please fill in - example: "sample_path"
+# output: "$id.$key.output"
+# output_types: "$id.$key.output_types.csv"
+# output_compression: "gzip"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/nextflow_schema.json
@@ -0,0 +1,80 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "split_modalities",
+  "description": "Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "arguments": {
+      "title": "Arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "Path to a single .h5mu file.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`. ",
+          "default": "sample_path"
+        },
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output directory containing multiple h5mu files.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output\"`, direction: `output`, example: `\"/path/to/output\"`. ",
+          "default": "$id.$key.output"
+        },
+        "output_types": {
+          "type": "string",
+          "format": "path",
+          "description": "A csv containing the base filename and modality type per output file.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output_types.csv\"`, direction: `output`, example: `\"types.csv\"`. ",
+          "default": "$id.$key.output_types.csv"
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dataflow/split_modalities/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/.config.vsh.yaml
@@ -0,0 +1,318 @@
+name: "pca"
+namespace: "dimred"
+version: "3.0.0"
+authors:
+- name: "Dries De Maeyer"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "ddemaeyer@gmail.com"
+      github: "ddemaeyer"
+      linkedin: "dries-de-maeyer-b46a814"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5mu file"
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--layer"
+    description: "Use specified layer for expression values instead of the .X object\
+      \ from the modality."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_input"
+    description: "Column name in .var matrix that will be used to select which genes\
+      \ to run the PCA on."
+    info: null
+    example:
+    - "filter_with_hvg"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_output"
+    description: "In which .obsm slot to store the resulting embedding."
+    info: null
+    default:
+    - "X_pca"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--varm_output"
+    description: "In which .varm slot to store the resulting loadings matrix."
+    info: null
+    default:
+    - "pca_loadings"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--uns_output"
+    description: "In which .uns slot to store the resulting variance objects."
+    info: null
+    default:
+    - "pca_variance"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--num_components"
+    description: "Number of principal components to compute. Defaults to 50, or 1\
+      \ - minimum dimension size of selected representation."
+    info: null
+    example:
+    - 25
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--overwrite"
+    description: "Allow overwriting .obsm, .varm and .uns slots."
+    info: null
+    direction: "input"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Computes PCA coordinates, loadings and variance decomposition. Uses\
+  \ the implementation of scikit-learn [Pedregosa11].\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "highmem"
+    - "middisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/dimred/pca/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/dimred/pca"
+  executable: "target/nextflow/dimred/pca/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/compress_h5mu.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'dimred/pca'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n'
+  author = 'Dries De Maeyer'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow_params.yaml
@@ -0,0 +1,16 @@
+# Arguments
+input: # please fill in - example: "input.h5mu"
+modality: "rna"
+# layer: "foo"
+# var_input: "filter_with_hvg"
+# output: "$id.$key.output.h5mu"
+obsm_output: "X_pca"
+varm_output: "pca_loadings"
+uns_output: "pca_variance"
+# num_components: 25
+overwrite: false
+# output_compression: "gzip"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/nextflow_schema.json
@@ -0,0 +1,117 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "pca",
+  "description": "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "arguments": {
+      "title": "Arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "Input h5mu file",
+          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
+        },
+        "modality": {
+          "type": "string",
+          "description": "Which modality from the input MuData file to process.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
+          "default": "rna"
+        },
+        "layer": {
+          "type": "string",
+          "description": "Use specified layer for expression values instead of the .X object from the modality.",
+          "help_text": "Type: `string`, multiple: `False`. "
+        },
+        "var_input": {
+          "type": "string",
+          "description": "Column name in .var matrix that will be used to select which genes to run the PCA on.",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"filter_with_hvg\"`. "
+        },
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output h5mu file.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
+          "default": "$id.$key.output.h5mu"
+        },
+        "obsm_output": {
+          "type": "string",
+          "description": "In which .obsm slot to store the resulting embedding.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"X_pca\"`. ",
+          "default": "X_pca"
+        },
+        "varm_output": {
+          "type": "string",
+          "description": "In which .varm slot to store the resulting loadings matrix.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"pca_loadings\"`. ",
+          "default": "pca_loadings"
+        },
+        "uns_output": {
+          "type": "string",
+          "description": "In which .uns slot to store the resulting variance objects.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"pca_variance\"`. ",
+          "default": "pca_variance"
+        },
+        "num_components": {
+          "type": "integer",
+          "description": "Number of principal components to compute",
+          "help_text": "Type: `integer`, multiple: `False`, example: `25`. "
+        },
+        "overwrite": {
+          "type": "boolean",
+          "description": "Allow overwriting .obsm, .varm and .uns slots.",
+          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
+          "default": false
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/pca/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/.config.vsh.yaml
@@ -0,0 +1,372 @@
+name: "umap"
+namespace: "dimred"
+version: "3.0.0"
+authors:
+- name: "Dries De Maeyer"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "ddemaeyer@gmail.com"
+      github: "ddemaeyer"
+      linkedin: "dries-de-maeyer-b46a814"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "Input h5mu file"
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--uns_neighbors"
+    description: "The `.uns` neighbors slot as output by the `find_neighbors` component."
+    info: null
+    default:
+    - "neighbors"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_output"
+    description: "The pre/postfix under which to store the UMAP results."
+    info: null
+    default:
+    - "umap"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Arguments"
+  arguments:
+  - type: "double"
+    name: "--min_dist"
+    description: "The effective minimum distance between embedded points. Smaller\
+      \ values will result in a more clustered/clumped embedding where nearby points\
+      \ on the manifold are drawn closer together, while larger values will result\
+      \ on a more even dispersal of points. The value should be set relative to the\
+      \ spread value, which determines the scale at which embedded points will be\
+      \ spread out."
+    info: null
+    default:
+    - 0.5
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--spread"
+    description: "The effective scale of embedded points. In combination with `min_dist`\
+      \ this determines how clustered/clumped the embedded points are."
+    info: null
+    default:
+    - 1.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--num_components"
+    description: "The number of dimensions of the embedding."
+    info: null
+    default:
+    - 2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_iter"
+    description: "The number of iterations (epochs) of the optimization. Called `n_epochs`\
+      \ in the original UMAP. Default is set to 500 if neighbors['connectivities'].shape[0]\
+      \ <= 10000, else 200."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--alpha"
+    description: "The initial learning rate for the embedding optimization."
+    info: null
+    default:
+    - 1.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--gamma"
+    description: "Weighting applied to negative samples in low dimensional embedding\
+      \ optimization. Values higher than one will result in greater weight being given\
+      \ to negative samples."
+    info: null
+    default:
+    - 1.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--negative_sample_rate"
+    description: "The number of negative edge/1-simplex samples to use per positive\
+      \ edge/1-simplex sample in optimizing the low dimensional embedding."
+    info: null
+    default:
+    - 5
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--init_pos"
+    description: "How to initialize the low dimensional embedding. Called `init` in\
+      \ the original UMAP. Options are:\n  \n* Any key from `.obsm`\n* `'paga'`: positions\
+      \ from `paga()`\n* `'spectral'`: use a spectral embedding of the graph\n* `'random'`:\
+      \ assign initial embedding positions at random.\n"
+    info: null
+    default:
+    - "spectral"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning\
+  \ technique suitable for visualizing high-dimensional data. Besides tending to be\
+  \ faster than tSNE, it optimizes the embedding such that it best reflects the topology\
+  \ of the data, which we represent throughout Scanpy using a neighborhood graph.\
+  \ tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in\
+  \ the embedding such that these best match the distribution of distances in the\
+  \ high-dimensional space. We use the implementation of umap-learn [McInnes18]. For\
+  \ a few comparisons of UMAP with tSNE, see this preprint.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "midmem"
+    - "middisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/dimred/umap/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/dimred/umap"
+  executable: "target/nextflow/dimred/umap/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/compress_h5mu.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'dimred/umap'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n'
+  author = 'Dries De Maeyer'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow_params.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow_params.yaml
@@ -0,0 +1,23 @@
+# Inputs
+input: # please fill in - example: "input.h5mu"
+modality: "rna"
+uns_neighbors: "neighbors"
+
+# Outputs
+# output: "$id.$key.output.h5mu"
+obsm_output: "umap"
+# output_compression: "gzip"
+
+# Arguments
+min_dist: 0.5
+spread: 1.0
+num_components: 2
+# max_iter: 123
+alpha: 1.0
+gamma: 1.0
+negative_sample_rate: 5
+init_pos: "spectral"
+
+# Nextflow input-output arguments
+publish_dir: # please fill in - example: "output/"
+# param_list: "my_params.yaml"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow_schema.json
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/nextflow_schema.json
@@ -0,0 +1,157 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "umap",
+  "description": "UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n",
+  "type": "object",
+  "$defs": {
+    "Dataset input": {
+      "title": "Dataset input",
+      "type": "object",
+      "description": "Dataset input using nf-tower \"dataset\" or \"data explorer\". Allows for the input of multiple         parameter sets to initialise a Nextflow channel.",
+      "properties": {
+        "param_list": {
+          "description": "Dataset input can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml                 blob. The names of the input fields (e.g. csv columns, json keys) need to be an exact match with the workflow input parameters.",
+          "type": "string",
+          "default": "",
+          "format": "file-path",
+          "mimetype": "text/csv"
+        }
+      }
+    },
+    "inputs": {
+      "title": "Inputs",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "input": {
+          "type": "string",
+          "format": "path",
+          "exists": true,
+          "description": "Input h5mu file",
+          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
+        },
+        "modality": {
+          "type": "string",
+          "description": "Which modality from the input MuData file to process.\n",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
+          "default": "rna"
+        },
+        "uns_neighbors": {
+          "type": "string",
+          "description": "The `.uns` neighbors slot as output by the `find_neighbors` component.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"neighbors\"`. ",
+          "default": "neighbors"
+        }
+      }
+    },
+    "outputs": {
+      "title": "Outputs",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "output": {
+          "type": "string",
+          "format": "path",
+          "description": "Output h5mu file.",
+          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ",
+          "default": "$id.$key.output.h5mu"
+        },
+        "obsm_output": {
+          "type": "string",
+          "description": "The pre/postfix under which to store the UMAP results.",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"umap\"`. ",
+          "default": "umap"
+        },
+        "output_compression": {
+          "type": "string",
+          "description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
+          "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
+          "enum": [
+            "gzip",
+            "lzf"
+          ]
+        }
+      }
+    },
+    "arguments": {
+      "title": "Arguments",
+      "type": "object",
+      "description": "No description",
+      "properties": {
+        "min_dist": {
+          "type": "number",
+          "description": "The effective minimum distance between embedded points",
+          "help_text": "Type: `double`, multiple: `False`, default: `0.5`. ",
+          "default": 0.5
+        },
+        "spread": {
+          "type": "number",
+          "description": "The effective scale of embedded points",
+          "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
+          "default": 1.0
+        },
+        "num_components": {
+          "type": "integer",
+          "description": "The number of dimensions of the embedding.",
+          "help_text": "Type: `integer`, multiple: `False`, default: `2`. ",
+          "default": 2
+        },
+        "max_iter": {
+          "type": "integer",
+          "description": "The number of iterations (epochs) of the optimization",
+          "help_text": "Type: `integer`, multiple: `False`. "
+        },
+        "alpha": {
+          "type": "number",
+          "description": "The initial learning rate for the embedding optimization.",
+          "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
+          "default": 1.0
+        },
+        "gamma": {
+          "type": "number",
+          "description": "Weighting applied to negative samples in low dimensional embedding optimization",
+          "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
+          "default": 1.0
+        },
+        "negative_sample_rate": {
+          "type": "integer",
+          "description": "The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding.",
+          "help_text": "Type: `integer`, multiple: `False`, default: `5`. ",
+          "default": 5
+        },
+        "init_pos": {
+          "type": "string",
+          "description": "How to initialize the low dimensional embedding",
+          "help_text": "Type: `string`, multiple: `False`, default: `\"spectral\"`. ",
+          "default": "spectral"
+        }
+      }
+    },
+    "nextflow input-output arguments": {
+      "title": "Nextflow input-output arguments",
+      "type": "object",
+      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+      "properties": {
+        "publish_dir": {
+          "type": "string",
+          "description": "Path to an output directory.",
+          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
+        }
+      }
+    }
+  },
+  "allOf": [
+    {
+      "$ref": "#/$defs/inputs"
+    },
+    {
+      "$ref": "#/$defs/outputs"
+    },
+    {
+      "$ref": "#/$defs/arguments"
+    },
+    {
+      "$ref": "#/$defs/nextflow input-output arguments"
+    }
+  ]
+}
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/setup_logger.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/dimred/umap/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/.config.vsh.yaml
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/.config.vsh.yaml
@@ -0,0 +1,423 @@
+name: "highly_variable_features_scanpy"
+namespace: "feature_annotation"
+version: "3.0.0"
+authors:
+- name: "Dries De Maeyer"
+  roles:
+  - "contributor"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "ddemaeyer@gmail.com"
+      github: "ddemaeyer"
+      linkedin: "dries-de-maeyer-b46a814"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+- name: "Robrecht Cannoodt"
+  roles:
+  - "maintainer"
+  - "contributor"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "Input h5mu file"
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--layer"
+    description: "use adata.layers[layer] for expression values instead of adata.X."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_input"
+    description: "If specified, use boolean array in adata.var[var_input] to calculate\
+      \ hvg on subset of vars.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_name_filter"
+    description: "In which .var slot to store a boolean array corresponding to which\
+      \ observations should be filtered out."
+    info: null
+    default:
+    - "filter_with_hvg"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--varm_name"
+    description: "In which .varm slot to store additional metadata."
+    info: null
+    default:
+    - "hvg"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--flavor"
+    description: "Choose the flavor for identifying highly variable features. For\
+      \ the dispersion based methods\nin their default workflows, Seurat passes the\
+      \ cutoffs whereas Cell Ranger passes n_top_features.\n"
+    info: null
+    default:
+    - "seurat"
+    required: false
+    choices:
+    - "seurat"
+    - "cell_ranger"
+    - "seurat_v3"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--n_top_features"
+    description: "Number of highly-variable features to keep. Mandatory if flavor='seurat_v3'."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--min_mean"
+    description: "If n_top_features is defined, this and all other cutoffs for the\
+      \ means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'."
+    info: null
+    default:
+    - 0.0125
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--max_mean"
+    description: "If n_top_features is defined, this and all other cutoffs for the\
+      \ means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'."
+    info: null
+    default:
+    - 3.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--min_disp"
+    description: "If n_top_features is defined, this and all other cutoffs for the\
+      \ means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'."
+    info: null
+    default:
+    - 0.5
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--max_disp"
+    description: "If n_top_features is defined, this and all other cutoffs for the\
+      \ means and the normalized dispersions are ignored. Ignored if flavor='seurat_v3'.\
+      \ Default is +inf."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--span"
+    description: "The fraction of the data (cells) used when estimating the variance\
+      \ in the loess model fit if flavor='seurat_v3'."
+    info: null
+    default:
+    - 0.3
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--n_bins"
+    description: "Number of bins for binning the mean feature expression. Normalization\
+      \ is done with respect to each bin. If just a single feature falls into a bin,\
+      \ the normalized dispersion is artificially set to 1."
+    info: null
+    default:
+    - 20
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_batch_key"
+    description: "If specified, highly-variable features are selected within each\
+      \ batch separately and merged. This simple \nprocess avoids the selection of\
+      \ batch-specific features and acts as a lightweight batch correction method.\
+      \ \nFor all flavors, features are first sorted by how many batches they are\
+      \ a HVG. For dispersion-based flavors \nties are broken by normalized dispersion.\
+      \ If flavor = 'seurat_v3', ties are broken by the median (across\nbatches) rank\
+      \ based on within-batch normalized variance.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Annotate highly variable features [Satija15] [Zheng17] [Stuart19].\n\
+  \nExpects logarithmized data, except when flavor='seurat_v3' in which count data\
+  \ is expected.\n\nDepending on flavor, this reproduces the R-implementations of\
+  \ Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the\
+  \ dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion\
+  \ is obtained by scaling with the mean and standard deviation of the dispersions\
+  \ for features falling into a given bin for mean expression of features. This means\
+  \ that for each bin of mean expression, highly variable features are selected.\n\
+  \nFor [Stuart19], a normalized variance for each feature is computed. First, the\
+  \ data are standardized (i.e., z-score normalization per feature) with a regularized\
+  \ standard deviation. Next, the normalized variance is computed as the variance\
+  \ of each feature after the transformation. Features are ranked by the normalized\
+  \ variance.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "singlecpu"
+    - "lowmem"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12"
+  target_tag: "3.0.0"
+  namespace_separator: "/"
+  setup:
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    - "scikit-misc"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+build_info:
+  config: "src/feature_annotation/highly_variable_features_scanpy/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker"
+  output: "target/nextflow/feature_annotation/highly_variable_features_scanpy"
+  executable: "target/nextflow/feature_annotation/highly_variable_features_scanpy/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "706b5ce24d313dcf947b7d9fe929630f1ad204e7"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "2.1.0-2-g706b5ce24d3"
+package_config:
+  name: "openpipeline"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'"
+  - ".version := \"3.0.0\""
+  - ".engines[.type == 'docker'].target_tag := '3.0.0'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "openpipelines-bio"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/compress_h5mu.py
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/main.nf
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/main.nf
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/nextflow.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'feature_annotation/highly_variable_features_scanpy'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = '3.0.0'
+  description = 'Annotate highly variable features [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\'seurat_v3\' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for features falling into a given bin for mean expression of features. This means that for each bin of mean expression, highly variable features are selected.\n\nFor [Stuart19], a normalized variance for each feature is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each feature after the transformation. Features are ranked by the normalized variance.\n'
+  author = 'Dries De Maeyer, Robrecht Cannoodt'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/nextflow_labels.config
+++ b/target/dependencies/github/openpipelines-bio/openpipeline/3.0.0/nextflow/feature_annotation/highly_variable_features_scanpy/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/Show More
+++ b/Show More