Build branch main with version main (173327cc)

Build pipeline: vsh-ci-build-template-k4qzr Source commit: 173327cc56 Source message: Cellranger multi conversion: fix combined AB + CB probe experiments (#1062)
2025-08-22 08:50:18 +00:00
commit cd5554d22f
2226 changed files with 1154442 additions and 0 deletions
--- a/target/executable/annotate/celltypist/.config.vsh.yaml
+++ b/target/executable/annotate/celltypist/.config.vsh.yaml
@@ -0,0 +1,471 @@
+name: "celltypist"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+- name: "Weiwei Schultz"
+  roles:
+  - "contributor"
+  info:
+    role: "Contributor"
+    organizations:
+    - name: "Janssen R&D US"
+      role: "Associate Director Data Sciences"
+argument_groups:
+- name: "Inputs"
+  description: "Input dataset (query) arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "The input (query) data to be labeled. Should be a .h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "The layer in the input data containing log normalized counts to\
+      \ be used for cell type annotation if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_gene_names"
+    description: "The name of the adata var column in the input data containing gene\
+      \ names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--input_reference_gene_overlap"
+    description: "The minimum number of genes present in both the reference and query\
+      \ datasets.\n"
+    info: null
+    default:
+    - 100
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "The reference data to train the CellTypist classifiers on. Only\
+      \ required if a pre-trained --model is not provided."
+    info: null
+    example:
+    - "reference.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "The layer in the reference data to be used for cell type annotation\
+      \ if .X is not to be used. Data are expected to be processed in the same way\
+      \ as the --input query dataset."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_target"
+    description: "The name of the adata obs column in the reference data containing\
+      \ cell type annotations."
+    info: null
+    default:
+    - "cell_ontology_class"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_gene_names"
+    description: "The name of the adata var column in the reference data containing\
+      \ gene names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_input"
+    description: ".var column containing highly variable genes. By default, do not\
+      \ subset genes.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Model arguments"
+  description: "Model arguments."
+  arguments:
+  - type: "file"
+    name: "--model"
+    description: "Pretrained model in pkl format. If not provided, the model will\
+      \ be trained on the reference data and --reference should be provided."
+    info: null
+    example:
+    - "pretrained_model.pkl"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--feature_selection"
+    description: "Whether to perform feature selection."
+    info: null
+    default:
+    - false
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--majority_voting"
+    description: "Whether to refine the predicted labels by running the majority voting\
+      \ classifier after over-clustering."
+    info: null
+    default:
+    - false
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--C"
+    description: "Inverse of regularization strength in logistic regression."
+    info: null
+    default:
+    - 1.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_iter"
+    description: "Maximum number of iterations before reaching the minimum of the\
+      \ cost function."
+    info: null
+    default:
+    - 1000
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--use_SGD"
+    description: "Whether to use the stochastic gradient descent algorithm."
+    info: null
+    direction: "input"
+  - type: "double"
+    name: "--min_prop"
+    description: "\"For the dominant cell type within a subcluster, the minimum proportion\
+      \ of cells required to \nsupport naming of the subcluster by this cell type.\
+      \ Ignored if majority_voting is set to False. \nSubcluster that fails to pass\
+      \ this proportion threshold will be assigned 'Heterogeneous'.\"\n"
+    info: null
+    default:
+    - 0.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_predictions"
+    description: "In which `.obs` slots to store the predicted information.\n"
+    info: null
+    default:
+    - "celltypist_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_probability"
+    description: "In which `.obs` slots to store the probability of the predictions.\n"
+    info: null
+    default:
+    - "celltypist_probability"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "cross_check_genes.py"
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Automated cell type annotation tool for scRNA-seq datasets on the basis\
+  \ of logistic regression classifiers optimised by the stochastic gradient descent\
+  \ algorithm."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "annotation_test_data"
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "highmem"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.10-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "scanpy~=1.10.4"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "celltypist==1.6.3"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/celltypist/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/celltypist"
+  executable: "target/executable/annotate/celltypist/celltypist"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/celltypist/celltypist
+++ b/target/executable/annotate/celltypist/celltypist
--- a/target/executable/annotate/celltypist/cross_check_genes.py
+++ b/target/executable/annotate/celltypist/cross_check_genes.py
@@ -0,0 +1,26 @@
+from typing import List
+
+
+def cross_check_genes(
+    query_genes: List[str], reference_genes: List[str], min_gene_overlap: int = 100
+) -> List[str]:
+    """Cross check the overlap between two lists of genes
+
+    Parameters
+    ----------
+    query_genes : List[str]
+        List of gene names
+    reference_genes : List[str]
+       List of gene names
+
+    Returns
+    -------
+    List[str]
+        List of overlapping genes
+    """
+    common_ens_ids = list(set(reference_genes).intersection(set(query_genes)))
+    assert len(common_ens_ids) >= min_gene_overlap, (
+        f"The intersection of genes between the query and reference dataset is too small, expected at least {min_gene_overlap}."
+    )
+
+    return common_ens_ids
--- a/target/executable/annotate/celltypist/nextflow_labels.config
+++ b/target/executable/annotate/celltypist/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/celltypist/set_var_index.py
+++ b/target/executable/annotate/celltypist/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/executable/annotate/celltypist/setup_logger.py
+++ b/target/executable/annotate/celltypist/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/annotate/celltypist/subset_vars.py
+++ b/target/executable/annotate/celltypist/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/executable/annotate/onclass/.config.vsh.yaml
+++ b/target/executable/annotate/onclass/.config.vsh.yaml
@@ -0,0 +1,442 @@
+name: "onclass"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+argument_groups:
+- name: "Inputs"
+  description: "Input dataset (query) arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "The input (query) data to be labeled. Should be a .h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "The layer in the input data to be used for cell type annotation\
+      \ if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_gene_names"
+    description: "The name of the adata var column in the input data containing gene\
+      \ names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--input_reference_gene_overlap"
+    description: "The minimum number of genes present in both the reference and query\
+      \ datasets.\n"
+    info: null
+    default:
+    - 100
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Ontology"
+  description: "Ontology input files"
+  arguments:
+  - type: "file"
+    name: "--cl_nlp_emb_file"
+    description: "The .nlp.emb file with the cell type embeddings."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--cl_ontology_file"
+    description: "The .ontology file with the cell type ontology."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--cl_obo_file"
+    description: "The .obo file with the cell type ontology."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "The reference data to train the CellTypist classifiers on. Only\
+      \ required if a pre-trained --model is not provided."
+    info: null
+    example:
+    - "reference.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "The layer in the reference data to be used for cell type annotation\
+      \ if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_target"
+    description: "The name of the adata obs column in the reference data containing\
+      \ cell type annotations."
+    info: null
+    example:
+    - "cell_ontology_class"
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_gene_names"
+    description: "The name of the adata var column in the reference data containing\
+      \ gene names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_input"
+    description: ".var column containing highly variable genes. By default, do not\
+      \ subset genes.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--unknown_celltype"
+    description: "Label for unknown cell types.\n"
+    info: null
+    default:
+    - "Unknown"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_predictions"
+    description: "In which `.obs` slots to store the predicted information.\n"
+    info: null
+    default:
+    - "onclass_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_probability"
+    description: "In which `.obs` slots to store the probability of the predictions.\n"
+    info: null
+    default:
+    - "onclass_prob"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Model arguments"
+  description: "Model arguments"
+  arguments:
+  - type: "string"
+    name: "--model"
+    description: "\"Pretrained model path without a file extension. If not provided,\
+      \ the model will be trained \non the reference data and --reference should be\
+      \ provided. The path namespace should contain:\n  - a .npz or .pkl file\n  -\
+      \ a .data file\n  - a .meta file\n  - a .index file\ne.g. /path/to/model/pretrained_model_target1\
+      \ as saved by OnClass.\"\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_iter"
+    description: "Maximum number of iterations for training the model."
+    info: null
+    default:
+    - 30
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "cross_check_genes.py"
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "OnClass is a python package for single-cell cell type annotation. It\
+  \ uses the Cell Ontology to capture the cell type similarity. \nThese similarities\
+  \ enable OnClass to annotate cell types that are never seen in the training data.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "annotation_test_data"
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "highmem"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "OnClass~=1.3"
+    - "tensorflow"
+    - "obonet"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/onclass/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/onclass"
+  executable: "target/executable/annotate/onclass/onclass"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/onclass/cross_check_genes.py
+++ b/target/executable/annotate/onclass/cross_check_genes.py
@@ -0,0 +1,26 @@
+from typing import List
+
+
+def cross_check_genes(
+    query_genes: List[str], reference_genes: List[str], min_gene_overlap: int = 100
+) -> List[str]:
+    """Cross check the overlap between two lists of genes
+
+    Parameters
+    ----------
+    query_genes : List[str]
+        List of gene names
+    reference_genes : List[str]
+       List of gene names
+
+    Returns
+    -------
+    List[str]
+        List of overlapping genes
+    """
+    common_ens_ids = list(set(reference_genes).intersection(set(query_genes)))
+    assert len(common_ens_ids) >= min_gene_overlap, (
+        f"The intersection of genes between the query and reference dataset is too small, expected at least {min_gene_overlap}."
+    )
+
+    return common_ens_ids
--- a/target/executable/annotate/onclass/nextflow_labels.config
+++ b/target/executable/annotate/onclass/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/onclass/onclass
+++ b/target/executable/annotate/onclass/onclass
--- a/target/executable/annotate/onclass/set_var_index.py
+++ b/target/executable/annotate/onclass/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/executable/annotate/onclass/setup_logger.py
+++ b/target/executable/annotate/onclass/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/annotate/onclass/subset_vars.py
+++ b/target/executable/annotate/onclass/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/executable/annotate/popv/.config.vsh.yaml
+++ b/target/executable/annotate/popv/.config.vsh.yaml
@@ -0,0 +1,408 @@
+name: "popv"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Matthias Beyens"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      github: "MatthiasBeyens"
+      orcid: "0000-0003-3304-0706"
+      email: "matthias.beyens@gmail.com"
+      linkedin: "mbeyens"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+- name: "Robrecht Cannoodt"
+  roles:
+  - "author"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Inputs"
+  description: "Arguments related to the input (aka query) dataset."
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "Which layer to use. If no value is provided, the counts are assumed\
+      \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_obs_batch"
+    description: "Key in obs field of input adata for batch information. If no value\
+      \ is provided, batch label is assumed to be unknown."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_subset"
+    description: "Subset the input object with this column."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_obs_label"
+    description: "Key in obs field of input adata for label information. This is only\
+      \ used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"\
+      `."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--unknown_celltype_label"
+    description: "If `input_obs_label` is specified, cells with this value will be\
+      \ treated as unknown and will be predicted by the model."
+    info: null
+    default:
+    - "unknown"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "User-provided reference tissue. The data that will be used as reference\
+      \ to call cell types."
+    info: null
+    example:
+    - "TS_Bladder_filtered.h5ad"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "Which layer to use. If no value is provided, the counts are assumed\
+      \ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_label"
+    description: "Key in obs field of reference AnnData with cell-type information."
+    info: null
+    default:
+    - "cell_ontology_class"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_batch"
+    description: "Key in obs field of input adata for batch information."
+    info: null
+    default:
+    - "donor_assay"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Arguments"
+  description: "Other arguments."
+  arguments:
+  - type: "string"
+    name: "--methods"
+    description: "Methods to call cell types. By default, runs to knn_on_scvi and\
+      \ scanvi."
+    info: null
+    example:
+    - "knn_on_scvi"
+    - "scanvi"
+    required: true
+    choices:
+    - "celltypist"
+    - "knn_on_bbknn"
+    - "knn_on_scanorama"
+    - "knn_on_scvi"
+    - "onclass"
+    - "rf"
+    - "scanvi"
+    - "svm"
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Performs popular major vote cell typing on single cell sequence data\
+  \ using multiple algorithms. Note that this is a one-shot version of PopV."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "annotation_test_data"
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highmem"
+    - "highcpu"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    env:
+    - "CFLAGS=\"-mno-avx512f -mno-avx2\""
+    - "CPPFLAGS=\"-mno-avx512f -mno-avx2\""
+  - type: "apt"
+    packages:
+    - "procps"
+    - "git"
+    - "build-essential"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "popv~=0.4.2"
+    - "numpy<2"
+    - "setuptools"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  - type: "docker"
+    run:
+    - "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git\n"
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/popv/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/popv"
+  executable: "target/executable/annotate/popv/popv"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/popv/nextflow_labels.config
+++ b/target/executable/annotate/popv/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/popv/popv
+++ b/target/executable/annotate/popv/popv
--- a/target/executable/annotate/popv/setup_logger.py
+++ b/target/executable/annotate/popv/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/annotate/random_forest_annotation/.config.vsh.yaml
+++ b/target/executable/annotate/random_forest_annotation/.config.vsh.yaml
@@ -0,0 +1,457 @@
+name: "random_forest_annotation"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+argument_groups:
+- name: "Inputs"
+  description: "Input dataset (query) arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "The input (query) data to be labeled. Should be a .h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "The layer in the input data to be used for cell type annotation\
+      \ if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_gene_names"
+    description: "The name of the adata var column in the input data containing gene\
+      \ names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--input_reference_gene_overlap"
+    description: "The minimum number of genes present in both the reference and query\
+      \ datasets.\n"
+    info: null
+    default:
+    - 100
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "The reference data to train the CellTypist classifiers on. Only\
+      \ required if a pre-trained --model is not provided."
+    info: null
+    example:
+    - "reference.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "The layer in the reference data to be used for cell type annotation\
+      \ if .X is not to be used. Data are expected to be processed in the same way\
+      \ as the --input query dataset."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_target"
+    description: "Key in obs field of reference modality with cell-type information."
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_gene_names"
+    description: "The name of the adata var column in the reference data containing\
+      \ gene names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_input"
+    description: ".var column containing highly variable genes. By default, do not\
+      \ subset genes.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_predictions"
+    description: "In which `.obs` slots to store the predicted information.\n"
+    info: null
+    default:
+    - "random_forest_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_probability"
+    description: "In which `.obs` slots to store the probability of the predictions.\n"
+    info: null
+    default:
+    - "random_forest_probability"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Model arguments"
+  description: "Model arguments."
+  arguments:
+  - type: "file"
+    name: "--model"
+    description: "Pretrained model in pkl format. If not provided, the model will\
+      \ be trained on the reference data and --reference should be provided."
+    info: null
+    example:
+    - "pretrained_model.pkl"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--n_estimators"
+    description: "Number of trees in the random forest."
+    info: null
+    default:
+    - 100
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_depth"
+    description: "Maximum depth of the trees in the random forest. \nIf not provided,\
+      \ the nodes are expanded until all leaves only contain a single sample.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--criterion"
+    description: "The function to measure the quality of a split."
+    info: null
+    default:
+    - "gini"
+    required: false
+    choices:
+    - "gini"
+    - "entropy"
+    - "log_loss"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--class_weight"
+    description: "Weights associated with classes.\nThe `balanced` mode uses the values\
+      \ of y to automatically adjust weights inversely proportional to class frequencies\
+      \ in the input data.\nThe `balanced_subsample` mode is the same as `balanced`\
+      \ except that weights are computed based on the bootstrap sample for every tree\
+      \ grown.\nThe `uniform` mode gives all classes a weight  of one.\n"
+    info: null
+    default:
+    - "balanced_subsample"
+    required: false
+    choices:
+    - "balanced"
+    - "balanced_subsample"
+    - "uniform"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--max_features"
+    description: "The number of features to consider when looking for the best split.\
+      \ The value can either be a positive integer or one of `sqrt`, `log2` or `all`.\n\
+      If integer: consider max_features features at each split.\nIf `sqrt`: max_features\
+      \ is the squareroot of all input features.\nIf `log2`: max_features is the log2\
+      \ of all input features.\nIf `all`: max features equals all input features.\n"
+    info: null
+    default:
+    - "200"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "cross_check_genes.py"
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Automated cell type annotation tool for scRNA-seq datasets on the basis\
+  \ of random forest."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "TS_Blood_filtered.h5mu"
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "highmem"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "scikit-learn==1.4.2"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/random_forest_annotation/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/random_forest_annotation"
+  executable: "target/executable/annotate/random_forest_annotation/random_forest_annotation"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/random_forest_annotation/cross_check_genes.py
+++ b/target/executable/annotate/random_forest_annotation/cross_check_genes.py
@@ -0,0 +1,26 @@
+from typing import List
+
+
+def cross_check_genes(
+    query_genes: List[str], reference_genes: List[str], min_gene_overlap: int = 100
+) -> List[str]:
+    """Cross check the overlap between two lists of genes
+
+    Parameters
+    ----------
+    query_genes : List[str]
+        List of gene names
+    reference_genes : List[str]
+       List of gene names
+
+    Returns
+    -------
+    List[str]
+        List of overlapping genes
+    """
+    common_ens_ids = list(set(reference_genes).intersection(set(query_genes)))
+    assert len(common_ens_ids) >= min_gene_overlap, (
+        f"The intersection of genes between the query and reference dataset is too small, expected at least {min_gene_overlap}."
+    )
+
+    return common_ens_ids
--- a/target/executable/annotate/random_forest_annotation/nextflow_labels.config
+++ b/target/executable/annotate/random_forest_annotation/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/random_forest_annotation/random_forest_annotation
+++ b/target/executable/annotate/random_forest_annotation/random_forest_annotation
--- a/target/executable/annotate/random_forest_annotation/set_var_index.py
+++ b/target/executable/annotate/random_forest_annotation/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/executable/annotate/random_forest_annotation/setup_logger.py
+++ b/target/executable/annotate/random_forest_annotation/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/annotate/random_forest_annotation/subset_vars.py
+++ b/target/executable/annotate/random_forest_annotation/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/executable/annotate/scanvi/.config.vsh.yaml
+++ b/target/executable/annotate/scanvi/.config.vsh.yaml
@@ -0,0 +1,480 @@
+name: "scanvi"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Dorien Roosen"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dorien@data-intuitive.com"
+      github: "dorien-er"
+      linkedin: "dorien-roosen"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+- name: "Weiwei Schultz"
+  roles:
+  - "contributor"
+  info:
+    role: "Contributor"
+    organizations:
+    - name: "Janssen R&D US"
+      role: "Associate Director Data Sciences"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5mu file. Note that this needs to be the exact same dataset\
+      \ as the --scvi_model was trained on."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "Input layer to use. If None, X is used"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_input"
+    description: ".var column containing highly variable genes that were used to train\
+      \ the scVi model. By default, do not subset genes."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_gene_names"
+    description: ".var column containing gene names. By default, use the index."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_labels"
+    description: ".obs field containing the labels"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--unlabeled_category"
+    description: "Value in the --obs_labels field that indicates unlabeled observations\n"
+    info: null
+    default:
+    - "Unknown"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "scVI Model"
+  arguments:
+  - type: "file"
+    name: "--scvi_model"
+    description: "Pretrained SCVI reference model to initialize the SCANVI model with."
+    info: null
+    example:
+    - "scvi_model.pt"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output_model"
+    description: "Folder where the state of the trained model will be saved to."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_output"
+    description: "In which .obsm slot to store the resulting integrated embedding."
+    info: null
+    default:
+    - "X_scanvi_integrated"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_output_predictions"
+    description: "In which .obs slot to store the predicted labels."
+    info: null
+    default:
+    - "scanvi_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_output_probabilities"
+    description: "In which. obs slot to store the probabilities of the predicted labels."
+    info: null
+    default:
+    - "scanvi_proba"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "scANVI training arguments"
+  arguments:
+  - type: "boolean"
+    name: "--early_stopping"
+    description: "Whether to perform early stopping with respect to the validation\
+      \ set."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--early_stopping_monitor"
+    description: "Metric logged during validation set epoch."
+    info: null
+    default:
+    - "elbo_validation"
+    required: false
+    choices:
+    - "elbo_validation"
+    - "reconstruction_loss_validation"
+    - "kl_local_validation"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--early_stopping_patience"
+    description: "Number of validation epochs with no improvement after which training\
+      \ will be stopped."
+    info: null
+    default:
+    - 45
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--early_stopping_min_delta"
+    description: "Minimum change in the monitored quantity to qualify as an improvement,\
+      \ i.e. an absolute change of less than min_delta, will count as no improvement."
+    info: null
+    default:
+    - 0.0
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_epochs"
+    description: "Number of passes through the dataset, defaults to (20000 / number\
+      \ of cells) * 400 or 400; whichever is smallest."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--reduce_lr_on_plateau"
+    description: "Whether to monitor validation loss and reduce learning rate when\
+      \ validation set `lr_scheduler_metric` plateaus."
+    info: null
+    default:
+    - true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--lr_factor"
+    description: "Factor to reduce learning rate."
+    info: null
+    default:
+    - 0.6
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--lr_patience"
+    description: "Number of epochs with no improvement after which learning rate will\
+      \ be reduced."
+    info: null
+    default:
+    - 30.0
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "scANVI () is a semi-supervised model for single-cell transcriptomics\
+  \ data. scANVI is an scVI extension that can leverage the cell type knowledge for\
+  \ a subset of the cells present in the data sets to infer the states of the rest\
+  \ of the cells.\nThis component will instantiate a scANVI model from a pre-trained\
+  \ scVI model, integrate the data and perform label prediction.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "scvi_model"
+- type: "file"
+  path: "TS_Blood_filtered.h5mu"
+- type: "file"
+  path: "pbmc_1k_protein_v3_mms.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "midcpu"
+    - "midmem"
+    - "gpu"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "nvcr.io/nvidia/pytorch:25.05-py3"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "jax[cuda]"
+    - "scvi-tools~=1.3.1"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/scanvi/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/scanvi"
+  executable: "target/executable/annotate/scanvi/scanvi"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/scanvi/compress_h5mu.py
+++ b/target/executable/annotate/scanvi/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/executable/annotate/scanvi/nextflow_labels.config
+++ b/target/executable/annotate/scanvi/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/scanvi/scanvi
+++ b/target/executable/annotate/scanvi/scanvi
--- a/target/executable/annotate/scanvi/set_var_index.py
+++ b/target/executable/annotate/scanvi/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/executable/annotate/scanvi/setup_logger.py
+++ b/target/executable/annotate/scanvi/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/annotate/scanvi/subset_vars.py
+++ b/target/executable/annotate/scanvi/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/executable/annotate/singler/.config.vsh.yaml
+++ b/target/executable/annotate/singler/.config.vsh.yaml
@@ -0,0 +1,519 @@
+name: "singler"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Dorien Roosen"
+  roles:
+  - "author"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dorien@data-intuitive.com"
+      github: "dorien-er"
+      linkedin: "dorien-roosen"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+- name: "Weiwei Schultz"
+  roles:
+  - "contributor"
+  info:
+    role: "Contributor"
+    organizations:
+    - name: "Janssen R&D US"
+      role: "Associate Director Data Sciences"
+argument_groups:
+- name: "Inputs"
+  description: "Input dataset (query) arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "The input (query) data to be labeled. Should be a .h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "The layer in the input data containing log normalized counts to\
+      \ be used for cell type annotation if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_gene_names"
+    description: "The name of the adata .var column in the input data containing gene\
+      \ names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_obs_clusters"
+    description: "The name of the adata .obs column containing cluster identities\
+      \ of the observations. \nIf provided, annoation is performed on the aggregated\
+      \ cluster profiles, \notherwise it defaults to annotation per observation.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--input_reference_gene_overlap"
+    description: "The minimum number of genes present in both the reference and query\
+      \ datasets.\n"
+    info: null
+    default:
+    - 100
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "The reference data to train the CellTypist classifiers on. Only\
+      \ required if a pre-trained --model is not provided."
+    info: null
+    example:
+    - "reference.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "The layer in the reference data containing lognormalized couns to\
+      \ be used for cell type annotation if .X is not to be used. Data are expected\
+      \ to be processed in the same way as the --input query dataset."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_target"
+    description: "The name of the adata obs column in the reference data containing\
+      \ cell type annotations."
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_gene_names"
+    description: "The name of the adata var column in the reference data containing\
+      \ gene names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_input"
+    description: ".var column containing a boolean mask corresponding to genes to\
+      \ be used for marker selection. By default, do not subset genes.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Arguments"
+  description: "Arguments related to the training of and classification with the SingleR\
+    \ model"
+  arguments:
+  - type: "integer"
+    name: "--de_n_genes"
+    description: "The number of differentially expressed genes across labels to be\
+      \ calculated from the reference.\nDefaults to 500 * (2/3) ^ log2(N) where N\
+      \ is the number of unique labels when if `--de_method` is set to `classic`,\n\
+      otherwise, defaults to 10.\n"
+    info: null
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--de_method"
+    description: "Method to detect differentially expressed genes between pairs of\
+      \ labels."
+    info: null
+    default:
+    - "classic"
+    required: false
+    choices:
+    - "classic"
+    - "t"
+    - "wilcox"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--quantile"
+    description: "The quantile of the correlation distribution to use to compute the\
+      \ score per label."
+    info: null
+    default:
+    - 0.8
+    required: false
+    min: 0.0
+    max: 1.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--fine_tune"
+    description: "Whether finetuning should be performed to improve the resolution.\
+      \ \nIf set to True, an additional finetuning step is performed after initial\
+      \ classification, \nnew marker genes are calculated based on all cells with\
+      \ a score higher then the maximum score minus `--fine_tuning_thershold`,\nand\
+      \ the calculation of the scores is repeated.\n"
+    info: null
+    default:
+    - true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--fine_tuning_threshold"
+    description: "The maximum difference from the maximum correlation to use in fine-tuning\n"
+    info: null
+    default:
+    - 0.05
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--prune"
+    description: "Whether label pruning should be performed. If set to True, an additional\
+      \ output .obs field `--output_obs_pruned_predictions` will be added to the `--output`,\
+      \ containing labels where 'low-quality' labels are replaced with NA's. Labels\
+      \ are considered 'low-quality' when their delta score (stored in `--output_obs_delta_next`)\
+      \ fall more than 3 median absolute deviations below the median for that label\
+      \ type."
+    info: null
+    default:
+    - true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_predictions"
+    description: "In which `.obs` slot to store the predicted labels. If `--fine_tune\
+      \ False`, this is based only on the maximum entry in `--output_obsm_scores`.\n"
+    info: null
+    default:
+    - "singler_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_probability"
+    description: "In which `.obs` slots to store the probability of the predicted\
+      \ labels.\n"
+    info: null
+    default:
+    - "singler_probability"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_delta_next"
+    description: "In which `.obs` slot to store the delta between the best and next-best\
+      \ score. If `--fine_tune True`, this is reported for scores after fine-tuning.\n"
+    info: null
+    default:
+    - "singler_delta_next"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_pruned_predictions"
+    description: "In which `.obs` slot to store the pruned labels, where low-quality\
+      \ labels are replaced with NA's. Only added if `--prune True`.\n"
+    info: null
+    default:
+    - "singler_pruned_labels"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obsm_scores"
+    description: "In which `.obsm` slot to store the matrix of prediction correlations\
+      \ at the specified quantile for each label (column) in each cell (row).\n"
+    info: null
+    default:
+    - "singler_scores"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "r_script"
+  path: "script.R"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "SingleR performs reference-based cell type annotation for single-cell\
+  \ RNA-seq data \nby computing Spearman correlations between test cells and reference\
+  \ samples with known labels, \nusing marker genes to assign the most similar cell\
+  \ type label to each new cell.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "TS_Blood_filtered.h5mu"
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "lowcpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "rocker/r2u:22.04"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    env:
+    - "RETICULATE_PYTHON=/usr/bin/python"
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "python3"
+    - "python3-pip"
+    - "python3-dev"
+    - "python-is-python3"
+    interactive: false
+  - type: "r"
+    cran:
+    - "anndata"
+    - "reticulate"
+    - "SingleR"
+    bioc:
+    - "scrapper"
+    - "bit64"
+    bioc_force_install: false
+    warnings_as_errors: true
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/singler/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/singler"
+  executable: "target/executable/annotate/singler/singler"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/singler/nextflow_labels.config
+++ b/target/executable/annotate/singler/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/singler/singler
+++ b/target/executable/annotate/singler/singler
--- a/target/executable/annotate/svm_annotation/.config.vsh.yaml
+++ b/target/executable/annotate/svm_annotation/.config.vsh.yaml
@@ -0,0 +1,440 @@
+name: "svm_annotation"
+namespace: "annotate"
+version: "main"
+authors:
+- name: "Jakub Majercik"
+  roles:
+  - "author"
+  info:
+    role: "Contributor"
+    links:
+      email: "jakub@data-intuitive.com"
+      github: "jakubmajercik"
+      linkedin: "jakubmajercik"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatics Engineer"
+argument_groups:
+- name: "Inputs"
+  description: "Input dataset (query) arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "The input (query) data to be labeled. Should be a .h5mu file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_layer"
+    description: "The layer in the input data to be used for cell type annotation\
+      \ if .X is not to be used."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--input_var_gene_names"
+    description: "The name of the adata var column in the input data containing gene\
+      \ names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--input_reference_gene_overlap"
+    description: "The minimum number of genes present in both the reference and query\
+      \ datasets.\n"
+    info: null
+    default:
+    - 100
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Reference"
+  description: "Arguments related to the reference dataset."
+  arguments:
+  - type: "file"
+    name: "--reference"
+    description: "The reference data to train the CellTypist classifiers on. Only\
+      \ required if a pre-trained --model is not provided."
+    info: null
+    example:
+    - "reference.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_layer"
+    description: "The layer in the reference data to be used for cell type annotation\
+      \ if .X is not to be used. Data are expected to be processed in the same way\
+      \ as the --input query dataset."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_obs_target"
+    description: "Key in .obs attribute of reference modality with cell-type information.\n"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_gene_names"
+    description: "The name of the adata var column in the reference data containing\
+      \ gene names; when no gene_name_layer is provided, the var index will be used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--reference_var_input"
+    description: ".var column containing highly variable genes. By default, do not\
+      \ subset genes.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "Output arguments."
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_prediction"
+    description: "In which `.obs` slots to store the predicted information.\n"
+    info: null
+    default:
+    - "svm_pred"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_obs_probability"
+    description: "In which `.obs` slots to store the probability of the predictions.\n"
+    info: null
+    default:
+    - "svm_probability"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Model arguments"
+  description: "Model arguments."
+  arguments:
+  - type: "file"
+    name: "--model"
+    description: "Pretrained model in pkl format. If not provided, the model will\
+      \ be trained on the reference data and --reference should be provided."
+    info: null
+    example:
+    - "pretrained_model.pkl"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean"
+    name: "--feature_selection"
+    description: "Whether to perform feature selection."
+    info: null
+    default:
+    - true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_iter"
+    description: "Maximum number of iterations for the SVM."
+    info: null
+    default:
+    - 5000
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--c_reg"
+    description: "Regularization parameter for the SVM."
+    info: null
+    default:
+    - 1.0
+    required: false
+    min: 0.0
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--class_weight"
+    description: "\"Class weights for the SVM.  The `uniform` mode gives all classes\
+      \ a weight  of one. \nThe `balanced` mode (default) uses the values of y to\
+      \ automatically adjust weights inversely \nproportional to class frequencies\
+      \ in the input data as n_samples / (n_classes * np.bincount(y))\"\n"
+    info: null
+    default:
+    - "balanced"
+    required: false
+    choices:
+    - "balanced"
+    - "uniform"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "cross_check_genes.py"
+- type: "file"
+  path: "subset_vars.py"
+- type: "file"
+  path: "set_var_index.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Automated cell type annotation tool for scRNA-seq datasets on the basis\
+  \ of SVMs."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "annotation_test_data"
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "highmem"
+    - "highdisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "scikit-learn==1.5.2"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/annotate/svm_annotation/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/annotate/svm_annotation"
+  executable: "target/executable/annotate/svm_annotation/svm_annotation"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/annotate/svm_annotation/cross_check_genes.py
+++ b/target/executable/annotate/svm_annotation/cross_check_genes.py
@@ -0,0 +1,26 @@
+from typing import List
+
+
+def cross_check_genes(
+    query_genes: List[str], reference_genes: List[str], min_gene_overlap: int = 100
+) -> List[str]:
+    """Cross check the overlap between two lists of genes
+
+    Parameters
+    ----------
+    query_genes : List[str]
+        List of gene names
+    reference_genes : List[str]
+       List of gene names
+
+    Returns
+    -------
+    List[str]
+        List of overlapping genes
+    """
+    common_ens_ids = list(set(reference_genes).intersection(set(query_genes)))
+    assert len(common_ens_ids) >= min_gene_overlap, (
+        f"The intersection of genes between the query and reference dataset is too small, expected at least {min_gene_overlap}."
+    )
+
+    return common_ens_ids
--- a/target/executable/annotate/svm_annotation/nextflow_labels.config
+++ b/target/executable/annotate/svm_annotation/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/annotate/svm_annotation/set_var_index.py
+++ b/target/executable/annotate/svm_annotation/set_var_index.py
@@ -0,0 +1,24 @@
+import anndata as ad
+import re
+
+
+def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
+    """Sanitize gene names and set the index of the .var DataFrame.
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    var_name : str | None
+        Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with sanitized and replaced index
+    """
+    if var_name:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
+    else:
+        adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
+    return adata
--- a/target/executable/annotate/svm_annotation/setup_logger.py
+++ b/target/executable/annotate/svm_annotation/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/annotate/svm_annotation/subset_vars.py
+++ b/target/executable/annotate/svm_annotation/subset_vars.py
@@ -0,0 +1,31 @@
+def subset_vars(adata, subset_col):
+    """Subset AnnData object on highly variable genes
+
+    Parameters
+    ----------
+    adata : AnnData
+        Annotated data object
+    subset_col : str
+        Name of the boolean column in `adata.var` that contains the information if features should be used or not
+
+    Returns
+    -------
+    AnnData
+        Copy of `adata` with subsetted features
+    """
+    if subset_col not in adata.var.columns:
+        raise ValueError(
+            f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
+        )
+
+    if adata.var[subset_col].dtype == "boolean":
+        assert adata.var[subset_col].isna().sum() == 0, (
+            f"The .var column `{subset_col}` contains NaN values. Can not subset data."
+        )
+        adata.var[subset_col] = adata.var[subset_col].astype("bool")
+
+    assert adata.var[subset_col].dtype == "bool", (
+        f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
+    )
+
+    return adata[:, adata.var[subset_col]].copy()
--- a/target/executable/annotate/svm_annotation/svm_annotation
+++ b/target/executable/annotate/svm_annotation/svm_annotation
--- a/target/executable/cluster/leiden/.config.vsh.yaml
+++ b/target/executable/cluster/leiden/.config.vsh.yaml
@@ -0,0 +1,305 @@
+name: "leiden"
+namespace: "cluster"
+version: "main"
+authors:
+- name: "Dries De Maeyer"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "ddemaeyer@gmail.com"
+      github: "ddemaeyer"
+      linkedin: "dries-de-maeyer-b46a814"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input file."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsp_connectivities"
+    description: "In which .obsp slot the neighbor connectivities can be found."
+    info: null
+    default:
+    - "connectivities"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_name"
+    description: "Name of the .obsm key under which to add the cluster labels.\nThe\
+      \ name of the columns in the matrix will correspond to the resolutions.\n"
+    info: null
+    default:
+    - "leiden"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--resolution"
+    description: "A parameter value controlling the coarseness of the clustering.\
+      \ Higher values lead to more clusters.\nMultiple values will result in clustering\
+      \ being performed multiple times.\n"
+    info: null
+    default:
+    - 1.0
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Cluster cells using the [Leiden algorithm] [Traag18] implemented in\
+  \ the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain\
+  \ algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15]\
+  \ [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn`\
+  \ first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in\
+  \ large networks, J. Stat. Mech.  \n[Levine15]: Levine et al. (2015), Data-Driven\
+  \ Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with\
+  \ Prognosis, Cell.  \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing\
+  \ well-connected communities arXiv.  \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale\
+  \ single-cell gene expression data analysis, Genome Biology.  \n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "highcpu"
+    - "midmem"
+    - "middisk"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.13-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    - "leidenalg~=0.10.0"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/cluster/leiden/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/cluster/leiden"
+  executable: "target/executable/cluster/leiden/leiden"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/cluster/leiden/compress_h5mu.py
+++ b/target/executable/cluster/leiden/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/executable/cluster/leiden/leiden
+++ b/target/executable/cluster/leiden/leiden
--- a/target/executable/cluster/leiden/nextflow_labels.config
+++ b/target/executable/cluster/leiden/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/cluster/leiden/setup_logger.py
+++ b/target/executable/cluster/leiden/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/compression/compress_h5mu/.config.vsh.yaml
+++ b/target/executable/compression/compress_h5mu/.config.vsh.yaml
@@ -0,0 +1,245 @@
+name: "compress_h5mu"
+namespace: "compression"
+version: "main"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Path to the input .h5mu."
+    info: null
+    example:
+    - "sample_path"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    description: "location of output file."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "compress_h5mu.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Compress a MuData file. \n"
+test_resources:
+- type: "python_script"
+  path: "run_test.py"
+  is_executable: true
+- type: "file"
+  path: "e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "singlecpu"
+    - "lowmem"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.10-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/compression/compress_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/compression/compress_h5mu"
+  executable: "target/executable/compression/compress_h5mu/compress_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/compression/compress_h5mu/compress_h5mu
+++ b/target/executable/compression/compress_h5mu/compress_h5mu
--- a/target/executable/compression/compress_h5mu/compress_h5mu.py
+++ b/target/executable/compression/compress_h5mu/compress_h5mu.py
@@ -0,0 +1,87 @@
+import shutil
+from anndata import AnnData
+from mudata import write_h5ad
+from h5py import File as H5File
+from h5py import Group, Dataset
+from pathlib import Path
+from typing import Union, Literal
+from functools import partial
+
+
+def compress_h5mu(
+    input_path: Union[str, Path],
+    output_path: Union[str, Path],
+    compression: Union[Literal["gzip"], Literal["lzf"]],
+):
+    input_path, output_path = str(input_path), str(output_path)
+
+    def copy_attributes(in_object, out_object):
+        for key, value in in_object.attrs.items():
+            out_object.attrs[key] = value
+
+    def visit_path(
+        output_h5: H5File,
+        compression: Union[Literal["gzip"], Literal["lzf"]],
+        name: str,
+        object: Union[Group, Dataset],
+    ):
+        if isinstance(object, Group):
+            new_group = output_h5.create_group(name)
+            copy_attributes(object, new_group)
+        elif isinstance(object, Dataset):
+            # Compression only works for non-scalar Dataset objects
+            # Scalar objects dont have a shape defined
+            if not object.compression and object.shape not in [None, ()]:
+                new_dataset = output_h5.create_dataset(
+                    name, data=object, compression=compression
+                )
+                copy_attributes(object, new_dataset)
+            else:
+                output_h5.copy(object, name)
+        else:
+            raise NotImplementedError(
+                f"Could not copy element {name}, "
+                f"type has not been implemented yet: {type(object)}"
+            )
+
+    with (
+        H5File(input_path, "r") as input_h5,
+        H5File(output_path, "w", userblock_size=512) as output_h5,
+    ):
+        copy_attributes(input_h5, output_h5)
+        input_h5.visititems(partial(visit_path, output_h5, compression))
+
+    with open(input_path, "rb") as input_bytes:
+        # Mudata puts metadata like this in the first 512 bytes:
+        # MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
+        # See mudata/_core/io.py, read_h5mu() function
+        starting_metadata = input_bytes.read(100)
+        # The metadata is padded with extra null bytes up until 512 bytes
+        truncate_location = starting_metadata.find(b"\x00")
+        starting_metadata = starting_metadata[:truncate_location]
+    with open(output_path, "br+") as f:
+        nbytes = f.write(starting_metadata)
+        f.write(b"\0" * (512 - nbytes))
+
+
+def write_h5ad_to_h5mu_with_compression(
+    output_file: Union[str, Path],
+    h5mu: Union[str, Path],
+    modality_name: str,
+    modality_data: AnnData,
+    output_compression=None,
+):
+    output_file = Path(output_file)
+    h5mu = Path(h5mu)
+    output_file_uncompressed = (
+        output_file.with_name(output_file.stem + "_uncompressed.h5mu")
+        if output_compression
+        else output_file
+    )
+    shutil.copyfile(h5mu, output_file_uncompressed)
+    write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
+    if output_compression:
+        compress_h5mu(
+            output_file_uncompressed, output_file, compression=output_compression
+        )
+        output_file_uncompressed.unlink()
--- a/target/executable/compression/compress_h5mu/nextflow_labels.config
+++ b/target/executable/compression/compress_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/compression/tar_extract/.config.vsh.yaml
+++ b/target/executable/compression/tar_extract/.config.vsh.yaml
@@ -0,0 +1,219 @@
+name: "tar_extract"
+namespace: "compression"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input file"
+    info: null
+    example:
+    - "input.tar.gz"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Folder to restore file(s) to."
+    info: null
+    example:
+    - "output_folder"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--strip_components"
+    alternatives:
+    - "-s"
+    description: "Strip this amount of leading components from file names on extraction.\
+      \ For example, to extract only 'myfile.txt' from an archive containing the structure\
+      \ `this/goes/deep/myfile.txt', use 3 to strip 'this/goes/deep/'."
+    info: null
+    example:
+    - 1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--exclude"
+    alternatives:
+    - "-e"
+    description: "Prevents any file or member whose name matches the shell wildcard\
+      \ (pattern) from being extracted."
+    info: null
+    example:
+    - "docs/figures"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "bash_script"
+  path: "script.sh"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Extract files from a tar archive"
+test_resources:
+- type: "bash_script"
+  path: "test.sh"
+  is_executable: true
+- type: "file"
+  path: "LICENSE"
+info: null
+status: "deprecated"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "singlecpu"
+    - "lowmem"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "ubuntu:latest"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/compression/tar_extract/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/compression/tar_extract"
+  executable: "target/executable/compression/tar_extract/tar_extract"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/compression/tar_extract/nextflow_labels.config
+++ b/target/executable/compression/tar_extract/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/compression/tar_extract/tar_extract
+++ b/target/executable/compression/tar_extract/tar_extract
--- a/target/executable/convert/from_10xh5_to_h5mu/.config.vsh.yaml
+++ b/target/executable/convert/from_10xh5_to_h5mu/.config.vsh.yaml
@@ -0,0 +1,352 @@
+name: "from_10xh5_to_h5mu"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Robrecht Cannoodt"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "A 10x h5 file as generated by Cell Ranger."
+    info: null
+    example:
+    - "raw_feature_bc_matrix.h5"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--input_metrics_summary"
+    description: "A metrics summary csv file as generated by Cell Ranger."
+    info: null
+    example:
+    - "metrics_cellranger.h5"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info:
+      slots:
+        mod:
+        - name: "rna"
+          required: true
+          description: "Gene expression counts."
+          slots:
+            var:
+            - name: "gene_symbol"
+              type: "string"
+              description: "Identification of the gene."
+              required: true
+            - name: "feature_types"
+              type: "string"
+              description: "The full name of the modality."
+              required: true
+            - name: "genome"
+              type: "string"
+              description: "Reference that was used to generate the data."
+              required: true
+        - name: "prot"
+          required: false
+          description: "Protein abundancy"
+          slots:
+            var:
+            - name: "gene_symbol"
+              type: "string"
+              description: "Identification of the gene."
+              required: true
+            - name: "feature_types"
+              type: "string"
+              description: "The full name of the modality."
+              required: true
+            - name: "genome"
+              type: "string"
+              description: "Reference that was used to generate the data."
+              required: true
+        - name: "vdj"
+          required: false
+          description: "VDJ transcript counts"
+          slots:
+            var:
+            - name: "gene_symbol"
+              type: "string"
+              description: "Identification of the gene."
+              required: true
+            - name: "feature_types"
+              type: "string"
+              description: "The full name of the modality."
+              required: true
+            - name: "genome"
+              type: "string"
+              description: "Reference that was used to generate the data."
+              required: true
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--uns_metrics"
+    description: "Name of the .uns slot under which to QC metrics (if any)."
+    info: null
+    default:
+    - "metrics_cellranger"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Arguments"
+  arguments:
+  - type: "integer"
+    name: "--min_genes"
+    description: "Minimum number of counts required for a cell to pass filtering."
+    info: null
+    example:
+    - 100
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--min_counts"
+    description: "Minimum number of genes expressed required for a cell to pass filtering."
+    info: null
+    example:
+    - 1000
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts a 10x h5 into an h5mu file.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_10xh5_to_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_10xh5_to_h5mu"
+  executable: "target/executable/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu
+++ b/target/executable/convert/from_10xh5_to_h5mu/from_10xh5_to_h5mu
--- a/target/executable/convert/from_10xh5_to_h5mu/nextflow_labels.config
+++ b/target/executable/convert/from_10xh5_to_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_10xh5_to_h5mu/setup_logger.py
+++ b/target/executable/convert/from_10xh5_to_h5mu/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/convert/from_10xmtx_to_h5mu/.config.vsh.yaml
+++ b/target/executable/convert/from_10xmtx_to_h5mu/.config.vsh.yaml
@@ -0,0 +1,253 @@
+name: "from_10xmtx_to_h5mu"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Robrecht Cannoodt"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input mtx folder"
+    info: null
+    example:
+    - "input_dir_containing_gz_files"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts a 10x mtx into an h5mu file.\n"
+test_resources:
+- type: "python_script"
+  path: "run_test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_10xmtx_to_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_10xmtx_to_h5mu"
+  executable: "target/executable/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu
+++ b/target/executable/convert/from_10xmtx_to_h5mu/from_10xmtx_to_h5mu
--- a/target/executable/convert/from_10xmtx_to_h5mu/nextflow_labels.config
+++ b/target/executable/convert/from_10xmtx_to_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_10xmtx_to_h5mu/setup_logger.py
+++ b/target/executable/convert/from_10xmtx_to_h5mu/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml
+++ b/target/executable/convert/from_bd_to_10x_molecular_barcode_tags/.config.vsh.yaml
@@ -0,0 +1,228 @@
+name: "from_bd_to_10x_molecular_barcode_tags"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input SAM or BAM file."
+    info: null
+    example:
+    - "input.bam"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output alignment file."
+    info: null
+    example:
+    - "output.sam"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--bam"
+    description: "Output a BAM file."
+    info: null
+    direction: "input"
+  - type: "integer"
+    name: "--threads"
+    alternatives:
+    - "-t"
+    description: "Number of threads"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "bash_script"
+  path: "script.sh"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Convert the molecular barcode sequence SAM tag from BD format (MA) to\
+  \ 10X format (UB).\n"
+test_resources:
+- type: "bash_script"
+  path: "run_test.sh"
+  is_executable: true
+- type: "file"
+  path: "output_raw"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "ubuntu:latest"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "samtools"
+    interactive: false
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_bd_to_10x_molecular_barcode_tags/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_bd_to_10x_molecular_barcode_tags"
+  executable: "target/executable/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags
+++ b/target/executable/convert/from_bd_to_10x_molecular_barcode_tags/from_bd_to_10x_molecular_barcode_tags
--- a/target/executable/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_labels.config
+++ b/target/executable/convert/from_bd_to_10x_molecular_barcode_tags/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_bdrhap_to_h5mu/.config.vsh.yaml
+++ b/target/executable/convert/from_bdrhap_to_h5mu/.config.vsh.yaml
@@ -0,0 +1,266 @@
+name: "from_bdrhap_to_h5mu"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dorien Roosen"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dorien@data-intuitive.com"
+      github: "dorien-er"
+      linkedin: "dorien-roosen"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+- name: "Robrecht Cannoodt"
+  roles:
+  - "author"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Inputs"
+  arguments:
+  - type: "string"
+    name: "--id"
+    description: "A sample ID."
+    info: null
+    example:
+    - "my_id"
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "The output h5mu of a BD Rhapsody workflow."
+    info: null
+    example:
+    - "sample.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output h5mu file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Convert the output of a BD Rhapsody pipeline v2.x to a MuData h5 file.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "sample.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_bdrhap_to_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_bdrhap_to_h5mu"
+  executable: "target/executable/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu
+++ b/target/executable/convert/from_bdrhap_to_h5mu/from_bdrhap_to_h5mu
--- a/target/executable/convert/from_bdrhap_to_h5mu/nextflow_labels.config
+++ b/target/executable/convert/from_bdrhap_to_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml
+++ b/target/executable/convert/from_cellranger_multi_to_h5mu/.config.vsh.yaml
@@ -0,0 +1,284 @@
+name: "from_cellranger_multi_to_h5mu"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input folder. Must contain the output from a cellranger multi run."
+    info: null
+    example:
+    - "input_dir_containing_modalities"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Locations for the output files. Must contain a wildcard (*) character,\n\
+      which will be replaced with the sample name.\n"
+    info: null
+    example:
+    - "*.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--sample_csv"
+    description: "CSV file describing the sample name per output file"
+    info: null
+    example:
+    - "samples.csv"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--uns_metrics"
+    description: "Name of the .uns slot under which to QC metrics (if any)."
+    info: null
+    default:
+    - "metrics_cellranger"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts the output from cellranger multi to a single .h5mu file.\n\
+  By default, will map the following library type names to modality names:\n  - Gene\
+  \ Expression: rna\n  - Peaks: atac\n  - Antibody Capture: prot\n  - VDJ: vdj\n \
+  \ - VDJ-T: vdj_t\n  - VDJ-B: vdj_b\n  - CRISPR Guide Capture: crispr\n  - Multiplexing\
+  \ Capture: hashing\n  \nOther library types have their whitepace removed and dashes\
+  \ replaced by\nunderscores to generate the modality name.\n\nCurrently does not\
+  \ allow parsing the output from cell barcode demultiplexing.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "10x_5k_anticmv"
+- type: "file"
+  path: "10x_5k_lung_crispr"
+- type: "file"
+  path: "10x_5k_beam"
+- type: "file"
+  path: "10x_5k_fixed"
+- type: "file"
+  path: "10x_4plex_dtc"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    - "scirpy~=0.12.0"
+    - "pandas~=2.2.3"
+    - "pytest"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_cellranger_multi_to_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_cellranger_multi_to_h5mu"
+  executable: "target/executable/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu
+++ b/target/executable/convert/from_cellranger_multi_to_h5mu/from_cellranger_multi_to_h5mu
--- a/target/executable/convert/from_cellranger_multi_to_h5mu/nextflow_labels.config
+++ b/target/executable/convert/from_cellranger_multi_to_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_cellranger_multi_to_h5mu/setup_logger.py
+++ b/target/executable/convert/from_cellranger_multi_to_h5mu/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/convert/from_h5ad_to_h5mu/.config.vsh.yaml
+++ b/target/executable/convert/from_h5ad_to_h5mu/.config.vsh.yaml
@@ -0,0 +1,261 @@
+name: "from_h5ad_to_h5mu"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dries De Maeyer"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "ddemaeyer@gmail.com"
+      github: "ddemaeyer"
+      linkedin: "dries-de-maeyer-b46a814"
+    organizations:
+    - name: "Janssen Pharmaceuticals"
+      href: "https://www.janssen.com"
+      role: "Principal Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5ad files"
+    info: null
+    default:
+    - "input.h5ad"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "List of names to use for the modalities. Will be used as the keys\
+      \ in the .mod attribute in the output MuData object\nThe number of items provided\
+      \ for this argument equal the number of input files (--input) and their order\
+      \ should match.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output MuData file."
+    info: null
+    default:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts a single layer h5ad file into a single MuData object\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_h5ad_to_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_h5ad_to_h5mu"
+  executable: "target/executable/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu
+++ b/target/executable/convert/from_h5ad_to_h5mu/from_h5ad_to_h5mu
--- a/target/executable/convert/from_h5ad_to_h5mu/nextflow_labels.config
+++ b/target/executable/convert/from_h5ad_to_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_h5ad_to_h5mu/setup_logger.py
+++ b/target/executable/convert/from_h5ad_to_h5mu/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/convert/from_h5ad_to_seurat/.config.vsh.yaml
+++ b/target/executable/convert/from_h5ad_to_seurat/.config.vsh.yaml
@@ -0,0 +1,242 @@
+name: "from_h5ad_to_seurat"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Robrecht Cannoodt"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5ad file"
+    info: null
+    example:
+    - "input.h5ad"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--assay"
+    description: "Name of the assay to be created."
+    info: null
+    default:
+    - "RNA"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output Seurat file"
+    info: null
+    example:
+    - "output.rds"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "r_script"
+  path: "script.R"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts an h5ad file into a Seurat file.\n"
+test_resources:
+- type: "r_script"
+  path: "test.R"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5ad"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "rocker/r2u:24.04"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "libgeos-dev"
+    interactive: false
+  - type: "r"
+    cran:
+    - "hdf5r"
+    - "Seurat"
+    - "SeuratObject"
+    github:
+    - "scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a"
+    bioc_force_install: false
+    warnings_as_errors: true
+  test_setup:
+  - type: "r"
+    cran:
+    - "testthat"
+    bioc_force_install: false
+    warnings_as_errors: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_h5ad_to_seurat/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_h5ad_to_seurat"
+  executable: "target/executable/convert/from_h5ad_to_seurat/from_h5ad_to_seurat"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_h5ad_to_seurat/from_h5ad_to_seurat
+++ b/target/executable/convert/from_h5ad_to_seurat/from_h5ad_to_seurat
--- a/target/executable/convert/from_h5ad_to_seurat/nextflow_labels.config
+++ b/target/executable/convert/from_h5ad_to_seurat/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_h5mu_or_h5ad_to_seurat/.config.vsh.yaml
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_seurat/.config.vsh.yaml
@@ -0,0 +1,251 @@
+name: "from_h5mu_or_h5ad_to_seurat"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dorien Roosen"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dorien@data-intuitive.com"
+      github: "dorien-er"
+      linkedin: "dorien-roosen"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5ad or h5mu file"
+    info: null
+    example:
+    - "input.h5ad"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Modality to be converted if the input file is an h5mu file."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--assay"
+    description: "Name of the assay to be created."
+    info: null
+    default:
+    - "RNA"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output Seurat file"
+    info: null
+    example:
+    - "output.rds"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "r_script"
+  path: "script.R"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts an h5ad file or a single modality of an h5mu file into a Seurat\
+  \ file.\n"
+test_resources:
+- type: "r_script"
+  path: "test.R"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "rocker/r2u:22.04"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "libgeos-dev"
+    - "hdf5-tools"
+    interactive: false
+  - type: "r"
+    cran:
+    - "anndata"
+    - "hdf5r"
+    - "Seurat"
+    - "SeuratObject"
+    github:
+    - "scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a"
+    bioc_force_install: false
+    warnings_as_errors: true
+  test_setup:
+  - type: "r"
+    cran:
+    - "testthat"
+    bioc_force_install: false
+    warnings_as_errors: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_h5mu_or_h5ad_to_seurat/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_h5mu_or_h5ad_to_seurat"
+  executable: "target/executable/convert/from_h5mu_or_h5ad_to_seurat/from_h5mu_or_h5ad_to_seurat"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_h5mu_or_h5ad_to_seurat/from_h5mu_or_h5ad_to_seurat
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_seurat/from_h5mu_or_h5ad_to_seurat
--- a/target/executable/convert/from_h5mu_or_h5ad_to_seurat/nextflow_labels.config
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_seurat/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/.config.vsh.yaml
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/.config.vsh.yaml
@@ -0,0 +1,428 @@
+name: "from_h5mu_or_h5ad_to_tiledb"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+argument_groups:
+- name: "Input"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "Input AnnData or MuData file. When an AnnData file is provided,\
+      \ it is automatically assumed to \ncontain transcriptome counts.\n"
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "RNA modality"
+  arguments:
+  - type: "string"
+    name: "--rna_modality"
+    description: "The name used for the RNA modality. Used when input file is a MuData\
+      \ object.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_raw_layer_input"
+    description: "Location of the layer containing the raw transcriptome counts. Layers\
+      \ are looked for in .layers,\nexcept when using the value 'X'; in which case\
+      \ .X is used.\n"
+    info: null
+    example:
+    - "X"
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_normalized_layer_input"
+    description: "Location of the layer containing the normalized counts. Layers are\
+      \ looked for in .layers,\nexcept when using the value 'X'; in which case .X\
+      \ is used.\n"
+    info: null
+    example:
+    - "log_normalized"
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_var_gene_names_input"
+    description: "Column in .var that provides the gene names. If not specified, the\
+      \ index from the input is used.\n"
+    info: null
+    example:
+    - "gene_symbol"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Protein modality"
+  arguments:
+  - type: "string"
+    name: "--prot_modality"
+    description: "The name used for the protein modality. Used when input file is\
+      \ a MuData object.\nWhen not specified, the protein modality will not be processed.\n"
+    info: null
+    example:
+    - "prot"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--prot_raw_layer_input"
+    description: "Location of the layer containing the raw protein counts. Layers\
+      \ are looked for in .layers,\nexcept when using the value 'X'; in which case\
+      \ .X is used.\n"
+    info: null
+    example:
+    - "X"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--prot_normalized_layer_input"
+    description: "Location of the layer containing the normalized counts. Layers are\
+      \ looked for in .layers,\nexcept when using the value 'X'; in which case .X\
+      \ is used.\n"
+    info: null
+    example:
+    - "clr"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Output slots"
+  arguments:
+  - type: "string"
+    name: "--rna_modality_output"
+    description: "TileDB Measurement name where the RNA modality will be stored.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--prot_modality_output"
+    description: "Name of the TileDB Measurement where the protein modality will be\
+      \ stored.\n"
+    info: null
+    default:
+    - "prot"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_index_name_output"
+    description: "Name of the index that is used to describe the cells (observations).\n"
+    info: null
+    default:
+    - "cell_id"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_var_index_name_output"
+    description: "Output name of the index that is used to describe the genes.\n"
+    info: null
+    default:
+    - "rna_index"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_raw_layer_output"
+    description: "Output location for the raw transcriptomics counts.\n"
+    info: null
+    default:
+    - "X"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_normalized_layer_output"
+    description: "Output location for the normalized RNA counts.\n"
+    info: null
+    default:
+    - "log_normalized"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--rna_var_gene_names_output"
+    description: "Name of the .var column that specifies the gene games.\n"
+    info: null
+    default:
+    - "gene_symbol"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--prot_var_index_name_output"
+    description: "Output name of the index that is used to describe the proteins.\
+      \ \n"
+    info: null
+    default:
+    - "prot_index"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--prot_raw_layer_output"
+    description: "Output location for the raw protein counts.\n"
+    info: null
+    default:
+    - "X"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--prot_normalized_layer_output"
+    description: "Output location for the normalized protein counts.\n"
+    info: null
+    default:
+    - "log_normalized"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Output arguments"
+  arguments:
+  - type: "file"
+    name: "--tiledb_dir"
+    description: "Directory where the TileDB output will be written to.\n"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Convert a MuData or AnnData object to tiledb. Currently, transcriptome\
+  \ and protein modalities are supported.\n\nNOTE: The functionality provided by this\
+  \ component is experimental and may be subject to change. \n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "midmem"
+    - "midcpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "tiledbsoma"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_h5mu_or_h5ad_to_tiledb/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_h5mu_or_h5ad_to_tiledb"
+  executable: "target/executable/convert/from_h5mu_or_h5ad_to_tiledb/from_h5mu_or_h5ad_to_tiledb"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/from_h5mu_or_h5ad_to_tiledb
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/from_h5mu_or_h5ad_to_tiledb
--- a/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/nextflow_labels.config
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/setup_logger.py
+++ b/target/executable/convert/from_h5mu_or_h5ad_to_tiledb/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/convert/from_h5mu_to_h5ad/.config.vsh.yaml
+++ b/target/executable/convert/from_h5mu_to_h5ad/.config.vsh.yaml
@@ -0,0 +1,262 @@
+name: "from_h5mu_to_h5ad"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Robrecht Cannoodt"
+  roles:
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input MuData file"
+    info: null
+    default:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "Which modality from the input MuData file to process.\n"
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output AnnData file."
+    info: null
+    default:
+    - "output.h5ad"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts a h5mu file into a h5ad file.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  test_setup:
+  - type: "apt"
+    packages:
+    - "git"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy==0.8.0"
+    github:
+    - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_h5mu_to_h5ad/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_h5mu_to_h5ad"
+  executable: "target/executable/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad
+++ b/target/executable/convert/from_h5mu_to_h5ad/from_h5mu_to_h5ad
--- a/target/executable/convert/from_h5mu_to_h5ad/nextflow_labels.config
+++ b/target/executable/convert/from_h5mu_to_h5ad/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/from_h5mu_to_h5ad/setup_logger.py
+++ b/target/executable/convert/from_h5mu_to_h5ad/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/target/executable/convert/from_h5mu_to_seurat/.config.vsh.yaml
+++ b/target/executable/convert/from_h5mu_to_seurat/.config.vsh.yaml
@@ -0,0 +1,237 @@
+name: "from_h5mu_to_seurat"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Robrecht Cannoodt"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5mu file"
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Output Seurat file"
+    info: null
+    example:
+    - "output.rds"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "r_script"
+  path: "script.R"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Converts an h5mu file into a Seurat file.\n\nRestrictions:\n  - Only\
+  \ the intersection of cells is currently loaded into the Seurat object due to the\
+  \ object structure limitation.\n  - Multimodal embeddings (global .obsm slot) are\
+  \ loaded with the assay.used field set to the default assay.\n  - Embeddings names\
+  \ are changed in order to comply with R & Seurat requirements and conventions.\n\
+  \  - Feature names with underscores ('_') are automatically replaced with dashes\
+  \ ('-')\n  - Seurat does not support global variables metadata /var.\n"
+test_resources:
+- type: "r_script"
+  path: "run_test.R"
+  is_executable: true
+- type: "file"
+  path: "10x_5k_anticmv"
+info: null
+status: "deprecated"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "singlecpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "rocker/r2u:24.04"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libhdf5-dev"
+    - "libgeos-dev"
+    interactive: false
+  - type: "r"
+    cran:
+    - "anndata"
+    - "hdf5r"
+    - "testthat"
+    - "SeuratObject"
+    - "Seurat"
+    bioc_force_install: false
+    warnings_as_errors: true
+  - type: "r"
+    github:
+    - "pmbio/MuDataSeurat@empty-tables-and-nullable"
+    bioc_force_install: false
+    warnings_as_errors: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/convert/from_h5mu_to_seurat/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/from_h5mu_to_seurat"
+  executable: "target/executable/convert/from_h5mu_to_seurat/from_h5mu_to_seurat"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/from_h5mu_to_seurat/from_h5mu_to_seurat
+++ b/target/executable/convert/from_h5mu_to_seurat/from_h5mu_to_seurat
--- a/target/executable/convert/from_h5mu_to_seurat/nextflow_labels.config
+++ b/target/executable/convert/from_h5mu_to_seurat/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/velocyto_to_h5mu/.config.vsh.yaml
+++ b/target/executable/convert/velocyto_to_h5mu/.config.vsh.yaml
@@ -0,0 +1,322 @@
+name: "velocyto_to_h5mu"
+namespace: "convert"
+version: "main"
+authors:
+- name: "Dries Schaumont"
+  roles:
+  - "maintainer"
+  - "author"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "dries@data-intuitive.com"
+      github: "DriesSchaumont"
+      orcid: "0000-0002-4389-0440"
+      linkedin: "dries-schaumont"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Scientist"
+- name: "Robrecht Cannoodt"
+  roles:
+  - "author"
+  info:
+    role: "Core Team Member"
+    links:
+      email: "robrecht@data-intuitive.com"
+      github: "rcannood"
+      orcid: "0000-0003-3641-729X"
+      linkedin: "robrechtcannoodt"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Data Science Engineer"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+- name: "Angela Oliveira Pisco"
+  roles:
+  - "contributor"
+  info:
+    role: "Contributor"
+    links:
+      github: "aopisco"
+      orcid: "0000-0003-0142-2355"
+      linkedin: "aopisco"
+    organizations:
+    - name: "Insitro"
+      href: "https://insitro.com"
+      role: "Director of Computational Biology"
+    - name: "Open Problems"
+      href: "https://openproblems.bio"
+      role: "Core Member"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input_loom"
+    description: "Path to the input loom file."
+    info: null
+    example:
+    - "input.loom"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--input_h5mu"
+    description: "If a MuData file is provided,"
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "The name of the modality to operate on."
+    info: null
+    default:
+    - "rna_velocity"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    description: "Path to the output MuData file."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--layer_spliced"
+    description: "Output layer for the spliced reads."
+    info: null
+    default:
+    - "velo_spliced"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--layer_unspliced"
+    description: "Output layer for the unspliced reads."
+    info: null
+    default:
+    - "velo_unspliced"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--layer_ambiguous"
+    description: "Output layer for the ambiguous reads."
+    info: null
+    default:
+    - "velo_ambiguous"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Convert a velocyto loom file to a h5mu file.\n\nIf an input h5mu file\
+  \ is also provided, the velocity\nh5ad object will get added to that h5mu instead.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "cellranger_tiny_fastq"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "lowmem"
+    - "lowcpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "anndata~=0.11.1"
+    - "mudata~=0.3.1"
+    - "scanpy~=1.10.4"
+    - "loompy"
+    script:
+    - "exec(\"try:\\n  import awkward\\nexcept ModuleNotFoundError:\\n  exit(0)\\\
+      nelse:  exit(1)\")"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/velocity/velocyto_to_h5mu/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/convert/velocyto_to_h5mu"
+  executable: "target/executable/convert/velocyto_to_h5mu/velocyto_to_h5mu"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/convert/velocyto_to_h5mu/nextflow_labels.config
+++ b/target/executable/convert/velocyto_to_h5mu/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/convert/velocyto_to_h5mu/velocyto_to_h5mu
+++ b/target/executable/convert/velocyto_to_h5mu/velocyto_to_h5mu
--- a/target/executable/correction/cellbender_remove_background/.config.vsh.yaml
+++ b/target/executable/correction/cellbender_remove_background/.config.vsh.yaml
@@ -0,0 +1,660 @@
+name: "cellbender_remove_background"
+namespace: "correction"
+version: "main"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input"
+    alternatives:
+    - "-i"
+    description: "Input h5mu file. Data file on which to run tool. Data must be un-filtered:\
+      \ it should include empty droplets."
+    info: null
+    example:
+    - "input.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--modality"
+    description: "List of modalities to process."
+    info: null
+    default:
+    - "rna"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Outputs"
+  arguments:
+  - type: "file"
+    name: "--output"
+    alternatives:
+    - "-o"
+    description: "Full count matrix as an h5mu file, with background RNA removed.\
+      \ This file contains all the original droplet barcodes."
+    info: null
+    example:
+    - "output.h5mu"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--layer_output"
+    description: "Output layer"
+    info: null
+    default:
+    - "cellbender_corrected"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_background_fraction"
+    info: null
+    default:
+    - "cellbender_background_fraction"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_cell_probability"
+    info: null
+    default:
+    - "cellbender_cell_probability"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_cell_size"
+    info: null
+    default:
+    - "cellbender_cell_size"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_droplet_efficiency"
+    description: "Name of the column in the .obs dataframe to store the droplet efficiencies\
+      \ in.\n"
+    info: null
+    default:
+    - "cellbender_droplet_efficiency"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obs_latent_scale"
+    info: null
+    default:
+    - "cellbender_latent_scale"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--var_ambient_expression"
+    info: null
+    default:
+    - "cellbender_ambient_expression"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--obsm_gene_expression_encoding"
+    info: null
+    default:
+    - "cellbender_gene_expression_encoding"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output_compression"
+    description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
+      By default no compression is applied.\n"
+    info: null
+    example:
+    - "gzip"
+    required: false
+    choices:
+    - "gzip"
+    - "lzf"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Arguments"
+  arguments:
+  - type: "boolean"
+    name: "--expected_cells_from_qc"
+    description: "Will use the Cell Ranger QC to determine the estimated number of\
+      \ cells"
+    info: null
+    default:
+    - false
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--expected_cells"
+    description: "Number of cells expected in the dataset (a rough estimate within\
+      \ a factor of 2 is sufficient)."
+    info: null
+    example:
+    - 1000
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--total_droplets_included"
+    description: "The number of droplets from the rank-ordered UMI plot\nthat will\
+      \ have their cell probabilities inferred as an\noutput. Include the droplets\
+      \ which might contain cells.\nDroplets beyond TOTAL_DROPLETS_INCLUDED should\
+      \ be\n'surely empty' droplets.\n"
+    info: null
+    example:
+    - 25000
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--force_cell_umi_prior"
+    description: "Ignore CellBender's heuristic prior estimation, and use this prior\
+      \ for UMI counts in cells."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--force_empty_umi_prior"
+    description: "Ignore CellBender's heuristic prior estimation, and use this prior\
+      \ for UMI counts in empty droplets."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--model"
+    description: "Which model is being used for count data.\n\n* 'naive' subtracts\
+      \ the estimated ambient profile.\n* 'simple' does not model either ambient RNA\
+      \ or random barcode swapping (for debugging purposes -- not recommended).\n\
+      * 'ambient' assumes background RNA is incorporated into droplets.\n* 'swapping'\
+      \ assumes background RNA comes from random barcode swapping (via PCR chimeras).\n\
+      * 'full' uses a combined ambient and swapping model.\n"
+    info: null
+    default:
+    - "full"
+    required: false
+    choices:
+    - "naive"
+    - "simple"
+    - "ambient"
+    - "swapping"
+    - "full"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--epochs"
+    description: "Number of epochs to train."
+    info: null
+    default:
+    - 150
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--low_count_threshold"
+    description: "Droplets with UMI counts below this number are completely \nexcluded\
+      \ from the analysis. This can help identify the correct \nprior for empty droplet\
+      \ counts in the rare case where empty \ncounts are extremely high (over 200).\n"
+    info: null
+    default:
+    - 5
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--z_dim"
+    description: "Dimension of latent variable z.\n"
+    info: null
+    default:
+    - 64
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--z_layers"
+    description: "Dimension of hidden layers in the encoder for z.\n"
+    info: null
+    default:
+    - 512
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "double"
+    name: "--training_fraction"
+    description: "Training detail: the fraction of the data used for training.\nThe\
+      \ rest is never seen by the inference algorithm. Speeds up learning.\n"
+    info: null
+    default:
+    - 0.9
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--empty_drop_training_fraction"
+    description: "Training detail: the fraction of the training data each epoch that\
+      \ \nis drawn (randomly sampled) from surely empty droplets.\n"
+    info: null
+    default:
+    - 0.2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--ignore_features"
+    description: "Integer indices of features to ignore entirely. In the output\n\
+      count matrix, the counts for these features will be unchanged.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "double"
+    name: "--fpr"
+    description: "Target 'delta' false positive rate in [0, 1). Use 0 for a cohort\n\
+      of samples which will be jointly analyzed for differential expression.\nA false\
+      \ positive is a true signal count that is erroneously removed.\nMore background\
+      \ removal is accompanied by more signal removal at\nhigh values of FPR. You\
+      \ can specify multiple values, which will\ncreate multiple output files.\n"
+    info: null
+    default:
+    - 0.01
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--exclude_feature_types"
+    description: "Feature types to ignore during the analysis. These features will\n\
+      be left unchanged in the output file.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "double"
+    name: "--projected_ambient_count_threshold"
+    description: "Controls how many features are included in the analysis, which\n\
+      can lead to a large speedup. If a feature is expected to have less\nthan PROJECTED_AMBIENT_COUNT_THRESHOLD\
+      \ counts total in all cells\n(summed), then that gene is excluded, and it will\
+      \ be unchanged\nin the output count matrix. For example, \nPROJECTED_AMBIENT_COUNT_THRESHOLD\
+      \ = 0 will include all features\nwhich have even a single count in any empty\
+      \ droplet.\n"
+    info: null
+    default:
+    - 0.1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--learning_rate"
+    description: "Training detail: lower learning rate for inference.\nA OneCycle\
+      \ learning rate schedule is used, where the\nupper learning rate is ten times\
+      \ this value. (For this\nvalue, probably do not exceed 1e-3).\n"
+    info: null
+    default:
+    - 1.0E-4
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--final_elbo_fail_fraction"
+    description: "Training is considered to have failed if \n(best_test_ELBO - final_test_ELBO)/(best_test_ELBO\
+      \ - initial_test_ELBO) > FINAL_ELBO_FAIL_FRACTION.\nTraining will automatically\
+      \ re-run if --num-training-tries > 1.\nBy default, will not fail training based\
+      \ on final_training_ELBO.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--epoch_elbo_fail_fraction"
+    description: "Training is considered to have failed if \n(previous_epoch_test_ELBO\
+      \ - current_epoch_test_ELBO)/(previous_epoch_test_ELBO - initial_train_ELBO)\
+      \ > EPOCH_ELBO_FAIL_FRACTION.\nTraining will automatically re-run if --num-training-tries\
+      \ > 1.\nBy default, will not fail training based on epoch_training_ELBO.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--num_training_tries"
+    description: "Number of times to attempt to train the model. At each subsequent\
+      \ attempt,\nthe learning rate is multiplied by LEARNING_RATE_RETRY_MULT.\n"
+    info: null
+    default:
+    - 1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--learning_rate_retry_mult"
+    description: "Learning rate is multiplied by this amount each time a new training\n\
+      attempt is made. (This parameter is only used if training fails based\non EPOCH_ELBO_FAIL_FRACTION\
+      \ or FINAL_ELBO_FAIL_FRACTION and\nNUM_TRAINING_TRIES is > 1.) \n"
+    info: null
+    default:
+    - 0.2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--posterior_batch_size"
+    description: "Training detail: size of batches when creating the posterior.\n\
+      Reduce this to avoid running out of GPU memory creating the posterior\n(will\
+      \ be slower).\n"
+    info: null
+    default:
+    - 128
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--posterior_regulation"
+    description: "Posterior regularization method. (For experts: not required for\
+      \ normal usage,\nsee documentation). \n\n* PRq is approximate quantile-targeting.\n\
+      * PRmu is approximate mean-targeting aggregated over genes (behavior of v0.2.0).\n\
+      * PRmu_gene is approximate mean-targeting per gene.\n"
+    info: null
+    required: false
+    choices:
+    - "PRq"
+    - "PRmu"
+    - "PRmu_gene"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--alpha"
+    description: "Tunable parameter alpha for the PRq posterior regularization method\n\
+      (not normally used: see documentation).\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--q"
+    description: "Tunable parameter q for the CDF threshold estimation method (not\n\
+      normally used: see documentation).\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--estimator"
+    description: "Output denoised count estimation method. (For experts: not required\n\
+      for normal usage, see documentation).\n"
+    info: null
+    default:
+    - "mckp"
+    required: false
+    choices:
+    - "map"
+    - "mean"
+    - "cdf"
+    - "sample"
+    - "mckp"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--estimator_multiple_cpu"
+    description: "Including the flag --estimator-multiple-cpu will use more than one\n\
+      CPU to compute the MCKP output count estimator in parallel (does nothing\nfor\
+      \ other estimators).\n"
+    info: null
+    direction: "input"
+  - type: "boolean"
+    name: "--constant_learning_rate"
+    description: "Including the flag --constant-learning-rate will use the ClippedAdam\n\
+      optimizer instead of the OneCycleLR learning rate schedule, which is\nthe default.\
+      \ Learning is faster with the OneCycleLR schedule.\nHowever, training can easily\
+      \ be continued from a checkpoint for more\nepochs than the initial command specified\
+      \ when using ClippedAdam. On\nthe other hand, if using the OneCycleLR schedule\
+      \ with 150 epochs\nspecified, it is not possible to pick up from that final\
+      \ checkpoint\nand continue training until 250 epochs.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--debug"
+    description: "Including the flag --debug will log extra messages useful for debugging.\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--cuda"
+    description: "Including the flag --cuda will run the inference on a\nGPU.\n"
+    info: null
+    direction: "input"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "setup_logger.py"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Eliminating technical artifacts from high-throughput single-cell RNA\
+  \ sequencing data.\n\nThis module removes counts due to ambient RNA molecules and\
+  \ random barcode swapping from (raw) UMI-based scRNA-seq count matrices. \nAt the\
+  \ moment, only the count matrices produced by the CellRanger count pipeline is supported.\
+  \ Support for additional tools and protocols \nwill be added in the future. A quick\
+  \ start tutorial can be found here.\n\nFleming et al. 2022, bioRxiv.\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+license: "MIT"
+links:
+  repository: "https://github.com/openpipelines-bio/openpipeline"
+  docker_registry: "ghcr.io"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    label:
+    - "midcpu"
+    - "midmem"
+    - "gpu"
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    run:
+    - "apt update && DEBIAN_FRONTEND=noninteractive apt install -y make build-essential\
+      \ libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget ca-certificates\
+      \ curl llvm libncurses5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev\
+      \ liblzma-dev mecab-ipadic-utf8 git \\\n&& curl https://pyenv.run | bash \\\n\
+      && pyenv update \\\n&& pyenv install $PYTHON_VERSION \\\n&& pyenv global $PYTHON_VERSION\
+      \ \\\n&& apt-get clean\n"
+    env:
+    - "PYENV_ROOT=\"/root/.pyenv\""
+    - "PATH=\"$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH\""
+    - "PYTHON_VERSION=3.7.16"
+  - type: "python"
+    user: false
+    packages:
+    - "lxml~=4.8.0"
+    - "mudata~=0.2.1"
+    - "cellbender~=0.3.0"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/correction/cellbender_remove_background/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/correction/cellbender_remove_background"
+  executable: "target/executable/correction/cellbender_remove_background/cellbender_remove_background"
+  viash_version: "0.9.4"
+  git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
+  git_remote: "https://github.com/openpipelines-bio/openpipeline"
+  git_tag: "0.2.0-2055-g173327cc"
+package_config:
+  name: "openpipeline"
+  version: "main"
+  summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
+  description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
+    \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
+    \nIn terms of workflows, the following has been made available, but keep in mind\
+    \ that\nindividual tools and functionality can be executed as standalone components\
+    \ as well.\n\n  * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
+    \  * Ingestion: Read mapping and generating a count matrix.\n  * Single sample\
+    \ processing: cell filtering and doublet detection.\n  * Multisample processing:\
+    \ Count transformation, normalization, QC metric calulations.\n  * Integration:\
+    \ Clustering, integration and batch correction using single and multimodal methods.\n\
+    \  * Downstream analysis workflows\n"
+  info:
+    test_resources:
+    - type: "s3"
+      path: "s3://openpipelines-data"
+      dest: "resources_test"
+    nextflow_labels_ci:
+    - path: "src/workflows/utils/labels_ci.config"
+      description: "Adds the correct memory and CPU labels when running on the Viash\
+        \ Hub CI."
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
+    .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
+    )'\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "single-cell"
+  - "multimodal"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/openpipelines-bio/openpipeline"
+    docker_registry: "ghcr.io"
+    homepage: "https://openpipelines.bio"
+    documentation: "https://openpipelines.bio/fundamentals"
+    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/correction/cellbender_remove_background/cellbender_remove_background
+++ b/target/executable/correction/cellbender_remove_background/cellbender_remove_background
--- a/target/executable/correction/cellbender_remove_background/nextflow_labels.config
+++ b/target/executable/correction/cellbender_remove_background/nextflow_labels.config
@@ -0,0 +1,48 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+
+  // The memory a task is assinged increases with each attempt
+  // uncomment the line below and adjust the value to set a global upper limit on the memory.
+  // resourceLimits = [ memory: 240.Gb ] 
+
+  // CPU resources
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  // Memory resources
+  withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
+  withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
+  withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
+  withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
+
+  // Disk space
+  withLabel: lowdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: middisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: highdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  withLabel: veryhighdisk {
+    disk = {process.disk ? process.disk : null}
+  }
+  
+  // NOTE: The above labels intentionally do not have an effect by default.
+  // The user should set the disk space requirements by adding the following
+  // to the compute environment:
+  //
+  // withLabel: lowdisk { disk = { 20.GB * task.attempt } }
+  // withLabel: middisk { disk = { 100.GB * task.attempt } }
+  // withLabel: highdisk { disk = { 200.GB * task.attempt } }
+  // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
+}
--- a/target/executable/correction/cellbender_remove_background/setup_logger.py
+++ b/target/executable/correction/cellbender_remove_background/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
--- a/Show More
+++ b/Show More