Build branch fix-integration-tests with version fix-integration-tests (c1fe2a62)

Build pipeline: vsh-ci-dev-rdkgl Source commit: c1fe2a629d Source message: Update CHANGELOG
2024-10-18 08:08:08 +00:00
parent cd0af18851
commit 43cfb251c7
1457 changed files with 222 additions and 905786 deletions
--- a/target/executable/dataflow/concatenate_h5mu/.config.vsh.yaml
+++ b/target/executable/dataflow/concatenate_h5mu/.config.vsh.yaml
@@ -1,306 +0,0 @@
-name: "concatenate_h5mu"
-namespace: "dataflow"
-version: "dev"
-authors:
- name: "Dries Schaumont"
-  roles:
-  - "maintainer"
-  info:
-    role: "Core Team Member"
-    links:
-      email: "dries@data-intuitive.com"
-      github: "DriesSchaumont"
-      orcid: "0000-0002-4389-0440"
-      linkedin: "dries-schaumont"
-    organizations:
-    - name: "Data Intuitive"
-      href: "https://www.data-intuitive.com"
-      role: "Data Scientist"
-argument_groups:
- name: "Arguments"
-  arguments:
-  - type: "file"
-    name: "--input"
-    alternatives:
-    - "-i"
-    description: "Paths to the different samples to be concatenated."
-    info: null
-    example:
-    - "sample_paths"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "input"
-    multiple: true
-    multiple_sep: ";"
-  - type: "string"
-    name: "--input_id"
-    description: "Names of the different samples that have to be concatenated.  Must\
-      \ be specified when using '--mode move'.\nIn this case, the ids will be used\
-      \ for the columns names of the dataframes registring the conflicts.\nIf specified,\
-      \ must be of same length as `--input`.\n"
-    info: null
-    required: false
-    direction: "input"
-    multiple: true
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output"
-    alternatives:
-    - "-o"
-    info: null
-    example:
-    - "output.h5mu"
-    must_exist: true
-    create_parent: true
-    required: false
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--output_compression"
-    description: "The compression format to be used on the output h5mu object."
-    info: null
-    example:
-    - "gzip"
-    required: false
-    choices:
-    - "gzip"
-    - "lzf"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--obs_sample_name"
-    description: "Name of the .obs key under which to add the sample names."
-    info: null
-    default:
-    - "sample_id"
-    required: false
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--other_axis_mode"
-    description: "How to handle the merging of other axis (var, obs, ...).\n\n  -\
-      \ None: keep no data\n  - same: only keep elements of the matrices which are\
-      \ the same in each of the samples\n  - unique: only keep elements for which\
-      \ there is only 1 possible value (1 value that can occur in multiple samples)\n\
-      \  - first: keep the annotation from the first sample\n  - only: keep elements\
-      \ that show up in only one of the objects (1 unique element in only 1 sample)\n\
-      \  - move: identical to 'same', but moving the conflicting values to .varm or\
-      \ .obsm\n"
-    info: null
-    default:
-    - "move"
-    required: false
-    choices:
-    - "same"
-    - "unique"
-    - "first"
-    - "only"
-    - "concat"
-    - "move"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--uns_merge_mode"
-    description: "How to handle the merging of .uns across modalities\n  - None: keep\
-      \ no data\n  - same: only keep elements of the matrices which are the same in\
-      \ each of the samples\n  - unique: only keep elements for which there is only\
-      \ 1 possible value (1 value that can occur in multiple samples)\n  - first:\
-      \ keep the annotation from the first sample\n  - only: keep elements that show\
-      \ up in only one of the objects (1 unique element in only 1 sample)\n  - make_unique:\
-      \ identical to 'unique', but keys which are not unique are made unique by prefixing\
-      \ them with the sample id.\n"
-    info: null
-    default:
-    - "make_unique"
-    required: false
-    choices:
-    - "same"
-    - "unique"
-    - "first"
-    - "only"
-    - "make_unique"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-resources:
- type: "python_script"
-  path: "script.py"
-  is_executable: true
- type: "file"
-  path: "setup_logger.py"
- type: "file"
-  path: "nextflow_labels.config"
-  dest: "nextflow_labels.config"
-description: "Concatenate observations from samples in several (uni- and/or multi-modal)\
-  \ MuData files into a single file.\n"
-test_resources:
- type: "python_script"
-  path: "test.py"
-  is_executable: true
- type: "file"
-  path: "e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"
- type: "file"
-  path: "human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"
- type: "file"
-  path: "openpipelinetestutils"
-  dest: "openpipelinetestutils"
-info: null
-status: "enabled"
-links:
-  repository: "https://github.com/openpipelines-bio/openpipeline"
-  docker_registry: "ghcr.io"
-runners:
- type: "executable"
-  id: "executable"
-  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
-  id: "nextflow"
-  directives:
-    label:
-    - "midcpu"
-    - "highmem"
-    tag: "$id"
-  auto:
-    simplifyInput: true
-    simplifyOutput: false
-    transcript: false
-    publish: false
-  config:
-    labels:
-      mem1gb: "memory = 1000000000.B"
-      mem2gb: "memory = 2000000000.B"
-      mem5gb: "memory = 5000000000.B"
-      mem10gb: "memory = 10000000000.B"
-      mem20gb: "memory = 20000000000.B"
-      mem50gb: "memory = 50000000000.B"
-      mem100gb: "memory = 100000000000.B"
-      mem200gb: "memory = 200000000000.B"
-      mem500gb: "memory = 500000000000.B"
-      mem1tb: "memory = 1000000000000.B"
-      mem2tb: "memory = 2000000000000.B"
-      mem5tb: "memory = 5000000000000.B"
-      mem10tb: "memory = 10000000000000.B"
-      mem20tb: "memory = 20000000000000.B"
-      mem50tb: "memory = 50000000000000.B"
-      mem100tb: "memory = 100000000000000.B"
-      mem200tb: "memory = 200000000000000.B"
-      mem500tb: "memory = 500000000000000.B"
-      mem1gib: "memory = 1073741824.B"
-      mem2gib: "memory = 2147483648.B"
-      mem4gib: "memory = 4294967296.B"
-      mem8gib: "memory = 8589934592.B"
-      mem16gib: "memory = 17179869184.B"
-      mem32gib: "memory = 34359738368.B"
-      mem64gib: "memory = 68719476736.B"
-      mem128gib: "memory = 137438953472.B"
-      mem256gib: "memory = 274877906944.B"
-      mem512gib: "memory = 549755813888.B"
-      mem1tib: "memory = 1099511627776.B"
-      mem2tib: "memory = 2199023255552.B"
-      mem4tib: "memory = 4398046511104.B"
-      mem8tib: "memory = 8796093022208.B"
-      mem16tib: "memory = 17592186044416.B"
-      mem32tib: "memory = 35184372088832.B"
-      mem64tib: "memory = 70368744177664.B"
-      mem128tib: "memory = 140737488355328.B"
-      mem256tib: "memory = 281474976710656.B"
-      mem512tib: "memory = 562949953421312.B"
-      cpu1: "cpus = 1"
-      cpu2: "cpus = 2"
-      cpu5: "cpus = 5"
-      cpu10: "cpus = 10"
-      cpu20: "cpus = 20"
-      cpu50: "cpus = 50"
-      cpu100: "cpus = 100"
-      cpu200: "cpus = 200"
-      cpu500: "cpus = 500"
-      cpu1000: "cpus = 1000"
-    script:
-    - "includeConfig(\"nextflow_labels.config\")"
-  debug: false
-  container: "docker"
-engines:
- type: "docker"
-  id: "docker"
-  image: "python:3.11-slim"
-  target_registry: "images.viash-hub.com"
-  target_tag: "dev"
-  namespace_separator: "/"
-  setup:
-  - type: "apt"
-    packages:
-    - "procps"
-    interactive: false
-  - type: "python"
-    user: false
-    packages:
-    - "anndata==0.10.8"
-    - "mudata~=0.2.4"
-    - "pandas!=2.1.2"
-    - "numpy<2.0.0"
-    - "pandas~=2.1.1"
-    upgrade: true
-  test_setup:
-  - type: "docker"
-    copy:
-    - "openpipelinetestutils /opt/openpipelinetestutils"
-  - type: "python"
-    user: false
-    packages:
-    - "/opt/openpipelinetestutils"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "viashpy==0.8.0"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "viashpy==0.8.0"
-    upgrade: true
-  entrypoint: []
-  cmd: null
- type: "native"
-  id: "native"
-build_info:
-  config: "src/dataflow/concatenate_h5mu/config.vsh.yaml"
-  runner: "executable"
-  engine: "docker|native"
-  output: "target/executable/dataflow/concatenate_h5mu"
-  executable: "target/executable/dataflow/concatenate_h5mu/concatenate_h5mu"
-  viash_version: "0.9.0"
-  git_commit: "2dbe3b7231f9abb4baa628e76e8abc686e627087"
-  git_remote: "https://x-access-token:ghs_NVsRTpmVPn6SfFQ131njHQOgn6tt7b1bmmJj@github.com/openpipelines-bio/openpipeline"
-  git_tag: "0.2.0-1926-g2dbe3b72"
-package_config:
-  name: "openpipeline"
-  version: "dev"
-  info:
-    test_resources:
-    - type: "s3"
-      path: "s3://openpipelines-data"
-      dest: "resources_test"
-  viash_version: "0.9.0"
-  source: "src"
-  target: "target"
-  config_mods:
-  - ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
-    .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
-    .runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
-    \ := 'includeConfig(\"nextflow_labels.config\")'"
-  - ".engines += { type: \"native\" }"
-  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
-  - ".engines[.type == 'docker'].target_tag := 'dev'"
-  organization: "vsh"
-  links:
-    repository: "https://github.com/openpipelines-bio/openpipeline"
-    docker_registry: "ghcr.io"
-    homepage: "https://openpipelines.bio"
-    documentation: "https://openpipelines.bio/fundamentals"
-    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/dataflow/concatenate_h5mu/concatenate_h5mu
+++ b/target/executable/dataflow/concatenate_h5mu/concatenate_h5mu
--- a/target/executable/dataflow/concatenate_h5mu/nextflow_labels.config
+++ b/target/executable/dataflow/concatenate_h5mu/nextflow_labels.config
@@ -1,42 +0,0 @@
-process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
-
-  // Retry for exit codes that have something to do with memory issues
-  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
-  maxRetries = 3
-  maxMemory = null
-
-  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
-  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
-}
-
-def get_memory(to_compare) {
-  if (!process.containsKey("maxMemory") || !process.maxMemory) {
-    return to_compare
-  }
-
-  try {
-    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
-      return process.maxMemory
-    }
-    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
-      return max_memory as nextflow.util.MemoryUnit
-    }
-    else {
-      return to_compare
-    }  
-  } catch (all) {
-        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
-        System.exit(1)
-  }
-}
--- a/target/executable/dataflow/concatenate_h5mu/setup_logger.py
+++ b/target/executable/dataflow/concatenate_h5mu/setup_logger.py
@@ -1,12 +0,0 @@
-def setup_logger():
-    import logging
-    from sys import stdout
-
-    logger = logging.getLogger()
-    logger.setLevel(logging.INFO)
-    console_handler = logging.StreamHandler(stdout)
-    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
-    console_handler.setFormatter(logFormatter)
-    logger.addHandler(console_handler)
-
-    return logger
--- a/target/executable/dataflow/merge/.config.vsh.yaml
+++ b/target/executable/dataflow/merge/.config.vsh.yaml
@@ -1,234 +0,0 @@
-name: "merge"
-namespace: "dataflow"
-version: "dev"
-authors:
- name: "Dries Schaumont"
-  roles:
-  - "maintainer"
-  info:
-    role: "Core Team Member"
-    links:
-      email: "dries@data-intuitive.com"
-      github: "DriesSchaumont"
-      orcid: "0000-0002-4389-0440"
-      linkedin: "dries-schaumont"
-    organizations:
-    - name: "Data Intuitive"
-      href: "https://www.data-intuitive.com"
-      role: "Data Scientist"
-argument_groups:
- name: "Arguments"
-  arguments:
-  - type: "file"
-    name: "--input"
-    alternatives:
-    - "-i"
-    description: "Paths to the single-modality .h5mu files that need to be combined"
-    info: null
-    default:
-    - "sample_paths"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "input"
-    multiple: true
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output"
-    alternatives:
-    - "-o"
-    description: "Path to the output file."
-    info: null
-    default:
-    - "output.h5mu"
-    must_exist: true
-    create_parent: true
-    required: false
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--output_compression"
-    description: "The compression format to be used on the output h5mu object."
-    info: null
-    example:
-    - "gzip"
-    required: false
-    choices:
-    - "gzip"
-    - "lzf"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-resources:
- type: "python_script"
-  path: "script.py"
-  is_executable: true
- type: "file"
-  path: "setup_logger.py"
- type: "file"
-  path: "nextflow_labels.config"
-  dest: "nextflow_labels.config"
-description: "Combine one or more single-modality .h5mu files together into one .h5mu\
-  \ file.\n"
-test_resources:
- type: "python_script"
-  path: "test.py"
-  is_executable: true
- type: "file"
-  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5mu"
- type: "file"
-  path: "pbmc_1k_protein_v3_filtered_feature_bc_matrix_prot.h5mu"
- type: "file"
-  path: "openpipelinetestutils"
-  dest: "openpipelinetestutils"
-info: null
-status: "enabled"
-links:
-  repository: "https://github.com/openpipelines-bio/openpipeline"
-  docker_registry: "ghcr.io"
-runners:
- type: "executable"
-  id: "executable"
-  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
-  id: "nextflow"
-  directives:
-    label:
-    - "singlecpu"
-    - "highmem"
-    tag: "$id"
-  auto:
-    simplifyInput: true
-    simplifyOutput: false
-    transcript: false
-    publish: false
-  config:
-    labels:
-      mem1gb: "memory = 1000000000.B"
-      mem2gb: "memory = 2000000000.B"
-      mem5gb: "memory = 5000000000.B"
-      mem10gb: "memory = 10000000000.B"
-      mem20gb: "memory = 20000000000.B"
-      mem50gb: "memory = 50000000000.B"
-      mem100gb: "memory = 100000000000.B"
-      mem200gb: "memory = 200000000000.B"
-      mem500gb: "memory = 500000000000.B"
-      mem1tb: "memory = 1000000000000.B"
-      mem2tb: "memory = 2000000000000.B"
-      mem5tb: "memory = 5000000000000.B"
-      mem10tb: "memory = 10000000000000.B"
-      mem20tb: "memory = 20000000000000.B"
-      mem50tb: "memory = 50000000000000.B"
-      mem100tb: "memory = 100000000000000.B"
-      mem200tb: "memory = 200000000000000.B"
-      mem500tb: "memory = 500000000000000.B"
-      mem1gib: "memory = 1073741824.B"
-      mem2gib: "memory = 2147483648.B"
-      mem4gib: "memory = 4294967296.B"
-      mem8gib: "memory = 8589934592.B"
-      mem16gib: "memory = 17179869184.B"
-      mem32gib: "memory = 34359738368.B"
-      mem64gib: "memory = 68719476736.B"
-      mem128gib: "memory = 137438953472.B"
-      mem256gib: "memory = 274877906944.B"
-      mem512gib: "memory = 549755813888.B"
-      mem1tib: "memory = 1099511627776.B"
-      mem2tib: "memory = 2199023255552.B"
-      mem4tib: "memory = 4398046511104.B"
-      mem8tib: "memory = 8796093022208.B"
-      mem16tib: "memory = 17592186044416.B"
-      mem32tib: "memory = 35184372088832.B"
-      mem64tib: "memory = 70368744177664.B"
-      mem128tib: "memory = 140737488355328.B"
-      mem256tib: "memory = 281474976710656.B"
-      mem512tib: "memory = 562949953421312.B"
-      cpu1: "cpus = 1"
-      cpu2: "cpus = 2"
-      cpu5: "cpus = 5"
-      cpu10: "cpus = 10"
-      cpu20: "cpus = 20"
-      cpu50: "cpus = 50"
-      cpu100: "cpus = 100"
-      cpu200: "cpus = 200"
-      cpu500: "cpus = 500"
-      cpu1000: "cpus = 1000"
-    script:
-    - "includeConfig(\"nextflow_labels.config\")"
-  debug: false
-  container: "docker"
-engines:
- type: "docker"
-  id: "docker"
-  image: "python:3.10-slim"
-  target_registry: "images.viash-hub.com"
-  target_tag: "dev"
-  namespace_separator: "/"
-  setup:
-  - type: "apt"
-    packages:
-    - "procps"
-    interactive: false
-  - type: "python"
-    user: false
-    packages:
-    - "anndata==0.10.8"
-    - "mudata~=0.2.4"
-    - "pandas!=2.1.2"
-    - "numpy<2.0.0"
-    - "pandas~=2.0.0"
-    upgrade: true
-  test_setup:
-  - type: "docker"
-    copy:
-    - "openpipelinetestutils /opt/openpipelinetestutils"
-  - type: "python"
-    user: false
-    packages:
-    - "/opt/openpipelinetestutils"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "viashpy==0.8.0"
-    upgrade: true
-  entrypoint: []
-  cmd: null
- type: "native"
-  id: "native"
-build_info:
-  config: "src/dataflow/merge/config.vsh.yml"
-  runner: "executable"
-  engine: "docker|native"
-  output: "target/executable/dataflow/merge"
-  executable: "target/executable/dataflow/merge/merge"
-  viash_version: "0.9.0"
-  git_commit: "2dbe3b7231f9abb4baa628e76e8abc686e627087"
-  git_remote: "https://x-access-token:ghs_NVsRTpmVPn6SfFQ131njHQOgn6tt7b1bmmJj@github.com/openpipelines-bio/openpipeline"
-  git_tag: "0.2.0-1926-g2dbe3b72"
-package_config:
-  name: "openpipeline"
-  version: "dev"
-  info:
-    test_resources:
-    - type: "s3"
-      path: "s3://openpipelines-data"
-      dest: "resources_test"
-  viash_version: "0.9.0"
-  source: "src"
-  target: "target"
-  config_mods:
-  - ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
-    .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
-    .runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
-    \ := 'includeConfig(\"nextflow_labels.config\")'"
-  - ".engines += { type: \"native\" }"
-  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
-  - ".engines[.type == 'docker'].target_tag := 'dev'"
-  organization: "vsh"
-  links:
-    repository: "https://github.com/openpipelines-bio/openpipeline"
-    docker_registry: "ghcr.io"
-    homepage: "https://openpipelines.bio"
-    documentation: "https://openpipelines.bio/fundamentals"
-    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/dataflow/merge/merge
+++ b/target/executable/dataflow/merge/merge
--- a/target/executable/dataflow/merge/nextflow_labels.config
+++ b/target/executable/dataflow/merge/nextflow_labels.config
@@ -1,42 +0,0 @@
-process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
-
-  // Retry for exit codes that have something to do with memory issues
-  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
-  maxRetries = 3
-  maxMemory = null
-
-  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
-  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
-}
-
-def get_memory(to_compare) {
-  if (!process.containsKey("maxMemory") || !process.maxMemory) {
-    return to_compare
-  }
-
-  try {
-    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
-      return process.maxMemory
-    }
-    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
-      return max_memory as nextflow.util.MemoryUnit
-    }
-    else {
-      return to_compare
-    }  
-  } catch (all) {
-        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
-        System.exit(1)
-  }
-}
--- a/target/executable/dataflow/merge/setup_logger.py
+++ b/target/executable/dataflow/merge/setup_logger.py
@@ -1,12 +0,0 @@
-def setup_logger():
-    import logging
-    from sys import stdout
-
-    logger = logging.getLogger()
-    logger.setLevel(logging.INFO)
-    console_handler = logging.StreamHandler(stdout)
-    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
-    console_handler.setFormatter(logFormatter)
-    logger.addHandler(console_handler)
-
-    return logger
--- a/target/executable/dataflow/split_h5mu/.config.vsh.yaml
+++ b/target/executable/dataflow/split_h5mu/.config.vsh.yaml
@@ -1,268 +0,0 @@
-name: "split_h5mu"
-namespace: "dataflow"
-version: "dev"
-authors:
- name: "Dorien Roosen"
-  roles:
-  - "author"
-  - "maintainer"
-  info:
-    role: "Core Team Member"
-    links:
-      email: "dorien@data-intuitive.com"
-      github: "dorien-er"
-      linkedin: "dorien-roosen"
-    organizations:
-    - name: "Data Intuitive"
-      href: "https://www.data-intuitive.com"
-      role: "Data Scientist"
-argument_groups:
- name: "Input & specifications"
-  arguments:
-  - type: "file"
-    name: "--input"
-    description: "Path to a single .h5mu file."
-    info: null
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--modality"
-    info: null
-    default:
-    - "rna"
-    required: false
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--obs_feature"
-    description: "The .obs column to split the mudata on."
-    info: null
-    example:
-    - "celltype"
-    required: true
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "boolean_true"
-    name: "--drop_obs_nan"
-    description: "Whether to drop all .obs columns that contain only nan values after\
-      \ splitting."
-    info: null
-    direction: "input"
-  - type: "boolean_true"
-    name: "--ensure_unique_filenames"
-    description: "Append number suffixes to ensure unique filenames after sanitizing\
-      \ obs feature values."
-    info: null
-    direction: "input"
- name: "Outputs"
-  arguments:
-  - type: "file"
-    name: "--output"
-    description: "Output directory containing multiple h5mu files."
-    info: null
-    example:
-    - "/path/to/output"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--output_compression"
-    description: "The compression format to be used on the output h5mu object."
-    info: null
-    example:
-    - "gzip"
-    required: false
-    choices:
-    - "gzip"
-    - "lzf"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output_files"
-    description: "A csv containing the base filename and obs feature by which it was\
-      \ split."
-    info: null
-    example:
-    - "sample_files.csv"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-resources:
- type: "python_script"
-  path: "script.py"
-  is_executable: true
- type: "file"
-  path: "nextflow_labels.config"
-  dest: "nextflow_labels.config"
-description: "Split the samples of a single modality from a .h5mu (multimodal) sample\
-  \ into seperate .h5mu files based on the values of an .obs column of this modality.\
-  \ \n"
-test_resources:
- type: "python_script"
-  path: "test.py"
-  is_executable: true
- type: "file"
-  path: "openpipelinetestutils"
-  dest: "openpipelinetestutils"
-info: null
-status: "enabled"
-links:
-  repository: "https://github.com/openpipelines-bio/openpipeline"
-  docker_registry: "ghcr.io"
-runners:
- type: "executable"
-  id: "executable"
-  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
-  id: "nextflow"
-  directives:
-    label:
-    - "singlecpu"
-    - "lowmem"
-    tag: "$id"
-  auto:
-    simplifyInput: true
-    simplifyOutput: false
-    transcript: false
-    publish: false
-  config:
-    labels:
-      mem1gb: "memory = 1000000000.B"
-      mem2gb: "memory = 2000000000.B"
-      mem5gb: "memory = 5000000000.B"
-      mem10gb: "memory = 10000000000.B"
-      mem20gb: "memory = 20000000000.B"
-      mem50gb: "memory = 50000000000.B"
-      mem100gb: "memory = 100000000000.B"
-      mem200gb: "memory = 200000000000.B"
-      mem500gb: "memory = 500000000000.B"
-      mem1tb: "memory = 1000000000000.B"
-      mem2tb: "memory = 2000000000000.B"
-      mem5tb: "memory = 5000000000000.B"
-      mem10tb: "memory = 10000000000000.B"
-      mem20tb: "memory = 20000000000000.B"
-      mem50tb: "memory = 50000000000000.B"
-      mem100tb: "memory = 100000000000000.B"
-      mem200tb: "memory = 200000000000000.B"
-      mem500tb: "memory = 500000000000000.B"
-      mem1gib: "memory = 1073741824.B"
-      mem2gib: "memory = 2147483648.B"
-      mem4gib: "memory = 4294967296.B"
-      mem8gib: "memory = 8589934592.B"
-      mem16gib: "memory = 17179869184.B"
-      mem32gib: "memory = 34359738368.B"
-      mem64gib: "memory = 68719476736.B"
-      mem128gib: "memory = 137438953472.B"
-      mem256gib: "memory = 274877906944.B"
-      mem512gib: "memory = 549755813888.B"
-      mem1tib: "memory = 1099511627776.B"
-      mem2tib: "memory = 2199023255552.B"
-      mem4tib: "memory = 4398046511104.B"
-      mem8tib: "memory = 8796093022208.B"
-      mem16tib: "memory = 17592186044416.B"
-      mem32tib: "memory = 35184372088832.B"
-      mem64tib: "memory = 70368744177664.B"
-      mem128tib: "memory = 140737488355328.B"
-      mem256tib: "memory = 281474976710656.B"
-      mem512tib: "memory = 562949953421312.B"
-      cpu1: "cpus = 1"
-      cpu2: "cpus = 2"
-      cpu5: "cpus = 5"
-      cpu10: "cpus = 10"
-      cpu20: "cpus = 20"
-      cpu50: "cpus = 50"
-      cpu100: "cpus = 100"
-      cpu200: "cpus = 200"
-      cpu500: "cpus = 500"
-      cpu1000: "cpus = 1000"
-    script:
-    - "includeConfig(\"nextflow_labels.config\")"
-  debug: false
-  container: "docker"
-engines:
- type: "docker"
-  id: "docker"
-  image: "python:3.12-slim"
-  target_registry: "images.viash-hub.com"
-  target_tag: "dev"
-  namespace_separator: "/"
-  setup:
-  - type: "apt"
-    packages:
-    - "procps"
-    interactive: false
-  - type: "python"
-    user: false
-    packages:
-    - "anndata==0.10.8"
-    - "mudata~=0.2.4"
-    - "pandas!=2.1.2"
-    - "numpy<2.0.0"
-    upgrade: true
-  test_setup:
-  - type: "docker"
-    copy:
-    - "openpipelinetestutils /opt/openpipelinetestutils"
-  - type: "python"
-    user: false
-    packages:
-    - "/opt/openpipelinetestutils"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "viashpy==0.8.0"
-    upgrade: true
-  entrypoint: []
-  cmd: null
- type: "native"
-  id: "native"
-build_info:
-  config: "src/dataflow/split_h5mu/config.vsh.yaml"
-  runner: "executable"
-  engine: "docker|native"
-  output: "target/executable/dataflow/split_h5mu"
-  executable: "target/executable/dataflow/split_h5mu/split_h5mu"
-  viash_version: "0.9.0"
-  git_commit: "2dbe3b7231f9abb4baa628e76e8abc686e627087"
-  git_remote: "https://x-access-token:ghs_NVsRTpmVPn6SfFQ131njHQOgn6tt7b1bmmJj@github.com/openpipelines-bio/openpipeline"
-  git_tag: "0.2.0-1926-g2dbe3b72"
-package_config:
-  name: "openpipeline"
-  version: "dev"
-  info:
-    test_resources:
-    - type: "s3"
-      path: "s3://openpipelines-data"
-      dest: "resources_test"
-  viash_version: "0.9.0"
-  source: "src"
-  target: "target"
-  config_mods:
-  - ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
-    .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
-    .runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
-    \ := 'includeConfig(\"nextflow_labels.config\")'"
-  - ".engines += { type: \"native\" }"
-  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
-  - ".engines[.type == 'docker'].target_tag := 'dev'"
-  organization: "vsh"
-  links:
-    repository: "https://github.com/openpipelines-bio/openpipeline"
-    docker_registry: "ghcr.io"
-    homepage: "https://openpipelines.bio"
-    documentation: "https://openpipelines.bio/fundamentals"
-    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/dataflow/split_h5mu/nextflow_labels.config
+++ b/target/executable/dataflow/split_h5mu/nextflow_labels.config
@@ -1,42 +0,0 @@
-process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
-
-  // Retry for exit codes that have something to do with memory issues
-  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
-  maxRetries = 3
-  maxMemory = null
-
-  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
-  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
-}
-
-def get_memory(to_compare) {
-  if (!process.containsKey("maxMemory") || !process.maxMemory) {
-    return to_compare
-  }
-
-  try {
-    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
-      return process.maxMemory
-    }
-    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
-      return max_memory as nextflow.util.MemoryUnit
-    }
-    else {
-      return to_compare
-    }  
-  } catch (all) {
-        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
-        System.exit(1)
-  }
-}
--- a/target/executable/dataflow/split_h5mu/split_h5mu
+++ b/target/executable/dataflow/split_h5mu/split_h5mu
--- a/target/executable/dataflow/split_h5mu_train_test/.config.vsh.yaml
+++ b/target/executable/dataflow/split_h5mu_train_test/.config.vsh.yaml
@@ -1,309 +0,0 @@
-name: "split_h5mu_train_test"
-namespace: "dataflow"
-version: "dev"
-authors:
- name: "Jakub Majercik"
-  roles:
-  - "author"
-  info:
-    role: "Contributor"
-    links:
-      email: "jakub@data-intuitive.com"
-      github: "jakubmajercik"
-      linkedin: "jakubmajercik"
-    organizations:
-    - name: "Data Intuitive"
-      href: "https://www.data-intuitive.com"
-      role: "Bioinformatics Engineer"
-argument_groups:
- name: "Inputs"
-  description: "Input dataset in mudata format."
-  arguments:
-  - type: "file"
-    name: "--input"
-    description: "The input (query) data to be labeled. Should be a .h5mu file."
-    info: null
-    example:
-    - "input.h5mu"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--modality"
-    description: "Which modality to process."
-    info: null
-    default:
-    - "rna"
-    required: false
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
- name: "Outputs"
-  description: "Output arguments."
-  arguments:
-  - type: "file"
-    name: "--output_train"
-    description: "The output training data in mudata format."
-    info: null
-    example:
-    - "output_train.h5mu"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output_test"
-    description: "The output testing data in mudata format."
-    info: null
-    example:
-    - "output_test.h5mu"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output_val"
-    description: "The output validation data in mudata format."
-    info: null
-    example:
-    - "output_val.h5mu"
-    must_exist: true
-    create_parent: true
-    required: false
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--compression"
-    info: null
-    example:
-    - "gzip"
-    required: false
-    choices:
-    - "gzip"
-    - "lzf"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
- name: "Split arguments"
-  description: "Model arguments."
-  arguments:
-  - type: "double"
-    name: "--test_size"
-    description: "The proportion of the dataset to include in the test split."
-    info: null
-    default:
-    - 0.2
-    required: false
-    min: 0.0
-    max: 1.0
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "double"
-    name: "--val_size"
-    description: "The proportion of the dataset to include in the validation split."
-    info: null
-    required: false
-    min: 0.0
-    max: 1.0
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "boolean_true"
-    name: "--shuffle"
-    description: "Whether or not to shuffle the data before splitting."
-    info: null
-    direction: "input"
-  - type: "integer"
-    name: "--random_state"
-    description: "The seed used by the random number generator."
-    info: null
-    required: false
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-resources:
- type: "python_script"
-  path: "script.py"
-  is_executable: true
- type: "file"
-  path: "setup_logger.py"
- type: "file"
-  path: "nextflow_labels.config"
-  dest: "nextflow_labels.config"
-description: "Split mudata object into training and testing (and validation) datasets\
-  \ based on observations into separate mudata objects."
-test_resources:
- type: "python_script"
-  path: "test.py"
-  is_executable: true
- type: "file"
-  path: "pbmc_1k_protein_v3_mms.h5mu"
- type: "file"
-  path: "openpipelinetestutils"
-  dest: "openpipelinetestutils"
-info: null
-status: "enabled"
-links:
-  repository: "https://github.com/openpipelines-bio/openpipeline"
-  docker_registry: "ghcr.io"
-runners:
- type: "executable"
-  id: "executable"
-  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
-  id: "nextflow"
-  directives:
-    tag: "$id"
-  auto:
-    simplifyInput: true
-    simplifyOutput: false
-    transcript: false
-    publish: false
-  config:
-    labels:
-      mem1gb: "memory = 1000000000.B"
-      mem2gb: "memory = 2000000000.B"
-      mem5gb: "memory = 5000000000.B"
-      mem10gb: "memory = 10000000000.B"
-      mem20gb: "memory = 20000000000.B"
-      mem50gb: "memory = 50000000000.B"
-      mem100gb: "memory = 100000000000.B"
-      mem200gb: "memory = 200000000000.B"
-      mem500gb: "memory = 500000000000.B"
-      mem1tb: "memory = 1000000000000.B"
-      mem2tb: "memory = 2000000000000.B"
-      mem5tb: "memory = 5000000000000.B"
-      mem10tb: "memory = 10000000000000.B"
-      mem20tb: "memory = 20000000000000.B"
-      mem50tb: "memory = 50000000000000.B"
-      mem100tb: "memory = 100000000000000.B"
-      mem200tb: "memory = 200000000000000.B"
-      mem500tb: "memory = 500000000000000.B"
-      mem1gib: "memory = 1073741824.B"
-      mem2gib: "memory = 2147483648.B"
-      mem4gib: "memory = 4294967296.B"
-      mem8gib: "memory = 8589934592.B"
-      mem16gib: "memory = 17179869184.B"
-      mem32gib: "memory = 34359738368.B"
-      mem64gib: "memory = 68719476736.B"
-      mem128gib: "memory = 137438953472.B"
-      mem256gib: "memory = 274877906944.B"
-      mem512gib: "memory = 549755813888.B"
-      mem1tib: "memory = 1099511627776.B"
-      mem2tib: "memory = 2199023255552.B"
-      mem4tib: "memory = 4398046511104.B"
-      mem8tib: "memory = 8796093022208.B"
-      mem16tib: "memory = 17592186044416.B"
-      mem32tib: "memory = 35184372088832.B"
-      mem64tib: "memory = 70368744177664.B"
-      mem128tib: "memory = 140737488355328.B"
-      mem256tib: "memory = 281474976710656.B"
-      mem512tib: "memory = 562949953421312.B"
-      cpu1: "cpus = 1"
-      cpu2: "cpus = 2"
-      cpu5: "cpus = 5"
-      cpu10: "cpus = 10"
-      cpu20: "cpus = 20"
-      cpu50: "cpus = 50"
-      cpu100: "cpus = 100"
-      cpu200: "cpus = 200"
-      cpu500: "cpus = 500"
-      cpu1000: "cpus = 1000"
-    script:
-    - "includeConfig(\"nextflow_labels.config\")"
-  debug: false
-  container: "docker"
-engines:
- type: "docker"
-  id: "docker"
-  image: "python:3.10-slim"
-  target_registry: "images.viash-hub.com"
-  target_tag: "dev"
-  namespace_separator: "/"
-  setup:
-  - type: "apt"
-    packages:
-    - "libhdf5-dev"
-    - "procps"
-    interactive: false
-  - type: "python"
-    user: false
-    packages:
-    - "scanpy~=1.9.6"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "scikit-learn==1.4.2"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "anndata==0.10.8"
-    - "mudata~=0.2.4"
-    - "pandas!=2.1.2"
-    - "numpy<2.0.0"
-    upgrade: true
-  test_setup:
-  - type: "docker"
-    copy:
-    - "openpipelinetestutils /opt/openpipelinetestutils"
-  - type: "python"
-    user: false
-    packages:
-    - "/opt/openpipelinetestutils"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "viashpy==0.8.0"
-    upgrade: true
-  entrypoint: []
-  cmd: null
- type: "native"
-  id: "native"
-build_info:
-  config: "src/dataflow/split_h5mu_train_test/config.vsh.yaml"
-  runner: "executable"
-  engine: "docker|native"
-  output: "target/executable/dataflow/split_h5mu_train_test"
-  executable: "target/executable/dataflow/split_h5mu_train_test/split_h5mu_train_test"
-  viash_version: "0.9.0"
-  git_commit: "2dbe3b7231f9abb4baa628e76e8abc686e627087"
-  git_remote: "https://x-access-token:ghs_NVsRTpmVPn6SfFQ131njHQOgn6tt7b1bmmJj@github.com/openpipelines-bio/openpipeline"
-  git_tag: "0.2.0-1926-g2dbe3b72"
-package_config:
-  name: "openpipeline"
-  version: "dev"
-  info:
-    test_resources:
-    - type: "s3"
-      path: "s3://openpipelines-data"
-      dest: "resources_test"
-  viash_version: "0.9.0"
-  source: "src"
-  target: "target"
-  config_mods:
-  - ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
-    .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
-    .runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
-    \ := 'includeConfig(\"nextflow_labels.config\")'"
-  - ".engines += { type: \"native\" }"
-  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
-  - ".engines[.type == 'docker'].target_tag := 'dev'"
-  organization: "vsh"
-  links:
-    repository: "https://github.com/openpipelines-bio/openpipeline"
-    docker_registry: "ghcr.io"
-    homepage: "https://openpipelines.bio"
-    documentation: "https://openpipelines.bio/fundamentals"
-    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/dataflow/split_h5mu_train_test/nextflow_labels.config
+++ b/target/executable/dataflow/split_h5mu_train_test/nextflow_labels.config
@@ -1,42 +0,0 @@
-process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
-
-  // Retry for exit codes that have something to do with memory issues
-  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
-  maxRetries = 3
-  maxMemory = null
-
-  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
-  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
-}
-
-def get_memory(to_compare) {
-  if (!process.containsKey("maxMemory") || !process.maxMemory) {
-    return to_compare
-  }
-
-  try {
-    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
-      return process.maxMemory
-    }
-    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
-      return max_memory as nextflow.util.MemoryUnit
-    }
-    else {
-      return to_compare
-    }  
-  } catch (all) {
-        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
-        System.exit(1)
-  }
-}
--- a/target/executable/dataflow/split_h5mu_train_test/setup_logger.py
+++ b/target/executable/dataflow/split_h5mu_train_test/setup_logger.py
@@ -1,12 +0,0 @@
-def setup_logger():
-    import logging
-    from sys import stdout
-
-    logger = logging.getLogger()
-    logger.setLevel(logging.INFO)
-    console_handler = logging.StreamHandler(stdout)
-    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
-    console_handler.setFormatter(logFormatter)
-    logger.addHandler(console_handler)
-
-    return logger
--- a/target/executable/dataflow/split_h5mu_train_test/split_h5mu_train_test
+++ b/target/executable/dataflow/split_h5mu_train_test/split_h5mu_train_test
--- a/target/executable/dataflow/split_modalities/.config.vsh.yaml
+++ b/target/executable/dataflow/split_modalities/.config.vsh.yaml
@@ -1,259 +0,0 @@
-name: "split_modalities"
-namespace: "dataflow"
-version: "dev"
-authors:
- name: "Dries Schaumont"
-  roles:
-  - "maintainer"
-  info:
-    role: "Core Team Member"
-    links:
-      email: "dries@data-intuitive.com"
-      github: "DriesSchaumont"
-      orcid: "0000-0002-4389-0440"
-      linkedin: "dries-schaumont"
-    organizations:
-    - name: "Data Intuitive"
-      href: "https://www.data-intuitive.com"
-      role: "Data Scientist"
- name: "Robrecht Cannoodt"
-  roles:
-  - "contributor"
-  info:
-    role: "Core Team Member"
-    links:
-      email: "robrecht@data-intuitive.com"
-      github: "rcannood"
-      orcid: "0000-0003-3641-729X"
-      linkedin: "robrechtcannoodt"
-    organizations:
-    - name: "Data Intuitive"
-      href: "https://www.data-intuitive.com"
-      role: "Data Science Engineer"
-    - name: "Open Problems"
-      href: "https://openproblems.bio"
-      role: "Core Member"
-argument_groups:
- name: "Arguments"
-  arguments:
-  - type: "file"
-    name: "--input"
-    alternatives:
-    - "-i"
-    description: "Path to a single .h5mu file."
-    info: null
-    default:
-    - "sample_path"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output"
-    alternatives:
-    - "-o"
-    description: "Output directory containing multiple h5mu files."
-    info: null
-    example:
-    - "/path/to/output"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-  - type: "string"
-    name: "--output_compression"
-    description: "The compression format to be used on the output h5mu object."
-    info: null
-    example:
-    - "gzip"
-    required: false
-    choices:
-    - "gzip"
-    - "lzf"
-    direction: "input"
-    multiple: false
-    multiple_sep: ";"
-  - type: "file"
-    name: "--output_types"
-    description: "A csv containing the base filename and modality type per output\
-      \ file."
-    info: null
-    example:
-    - "types.csv"
-    must_exist: true
-    create_parent: true
-    required: true
-    direction: "output"
-    multiple: false
-    multiple_sep: ";"
-resources:
- type: "python_script"
-  path: "script.py"
-  is_executable: true
- type: "file"
-  path: "setup_logger.py"
- type: "file"
-  path: "nextflow_labels.config"
-  dest: "nextflow_labels.config"
-description: "Split the modalities from a single .h5mu multimodal sample into seperate\
-  \ .h5mu files. \n"
-test_resources:
- type: "python_script"
-  path: "test.py"
-  is_executable: true
- type: "file"
-  path: "openpipelinetestutils"
-  dest: "openpipelinetestutils"
-info: null
-status: "enabled"
-links:
-  repository: "https://github.com/openpipelines-bio/openpipeline"
-  docker_registry: "ghcr.io"
-runners:
- type: "executable"
-  id: "executable"
-  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
-  id: "nextflow"
-  directives:
-    label:
-    - "singlecpu"
-    - "lowmem"
-    tag: "$id"
-  auto:
-    simplifyInput: true
-    simplifyOutput: false
-    transcript: false
-    publish: false
-  config:
-    labels:
-      mem1gb: "memory = 1000000000.B"
-      mem2gb: "memory = 2000000000.B"
-      mem5gb: "memory = 5000000000.B"
-      mem10gb: "memory = 10000000000.B"
-      mem20gb: "memory = 20000000000.B"
-      mem50gb: "memory = 50000000000.B"
-      mem100gb: "memory = 100000000000.B"
-      mem200gb: "memory = 200000000000.B"
-      mem500gb: "memory = 500000000000.B"
-      mem1tb: "memory = 1000000000000.B"
-      mem2tb: "memory = 2000000000000.B"
-      mem5tb: "memory = 5000000000000.B"
-      mem10tb: "memory = 10000000000000.B"
-      mem20tb: "memory = 20000000000000.B"
-      mem50tb: "memory = 50000000000000.B"
-      mem100tb: "memory = 100000000000000.B"
-      mem200tb: "memory = 200000000000000.B"
-      mem500tb: "memory = 500000000000000.B"
-      mem1gib: "memory = 1073741824.B"
-      mem2gib: "memory = 2147483648.B"
-      mem4gib: "memory = 4294967296.B"
-      mem8gib: "memory = 8589934592.B"
-      mem16gib: "memory = 17179869184.B"
-      mem32gib: "memory = 34359738368.B"
-      mem64gib: "memory = 68719476736.B"
-      mem128gib: "memory = 137438953472.B"
-      mem256gib: "memory = 274877906944.B"
-      mem512gib: "memory = 549755813888.B"
-      mem1tib: "memory = 1099511627776.B"
-      mem2tib: "memory = 2199023255552.B"
-      mem4tib: "memory = 4398046511104.B"
-      mem8tib: "memory = 8796093022208.B"
-      mem16tib: "memory = 17592186044416.B"
-      mem32tib: "memory = 35184372088832.B"
-      mem64tib: "memory = 70368744177664.B"
-      mem128tib: "memory = 140737488355328.B"
-      mem256tib: "memory = 281474976710656.B"
-      mem512tib: "memory = 562949953421312.B"
-      cpu1: "cpus = 1"
-      cpu2: "cpus = 2"
-      cpu5: "cpus = 5"
-      cpu10: "cpus = 10"
-      cpu20: "cpus = 20"
-      cpu50: "cpus = 50"
-      cpu100: "cpus = 100"
-      cpu200: "cpus = 200"
-      cpu500: "cpus = 500"
-      cpu1000: "cpus = 1000"
-    script:
-    - "includeConfig(\"nextflow_labels.config\")"
-  debug: false
-  container: "docker"
-engines:
- type: "docker"
-  id: "docker"
-  image: "python:3.12-slim"
-  target_registry: "images.viash-hub.com"
-  target_tag: "dev"
-  namespace_separator: "/"
-  setup:
-  - type: "apt"
-    packages:
-    - "procps"
-    interactive: false
-  - type: "python"
-    user: false
-    packages:
-    - "anndata==0.10.8"
-    - "mudata~=0.2.4"
-    - "pandas!=2.1.2"
-    - "numpy<2.0.0"
-    upgrade: true
-  test_setup:
-  - type: "docker"
-    copy:
-    - "openpipelinetestutils /opt/openpipelinetestutils"
-  - type: "python"
-    user: false
-    packages:
-    - "/opt/openpipelinetestutils"
-    upgrade: true
-  - type: "python"
-    user: false
-    packages:
-    - "viashpy==0.8.0"
-    upgrade: true
-  entrypoint: []
-  cmd: null
- type: "native"
-  id: "native"
-build_info:
-  config: "src/dataflow/split_modalities/config.vsh.yaml"
-  runner: "executable"
-  engine: "docker|native"
-  output: "target/executable/dataflow/split_modalities"
-  executable: "target/executable/dataflow/split_modalities/split_modalities"
-  viash_version: "0.9.0"
-  git_commit: "2dbe3b7231f9abb4baa628e76e8abc686e627087"
-  git_remote: "https://x-access-token:ghs_NVsRTpmVPn6SfFQ131njHQOgn6tt7b1bmmJj@github.com/openpipelines-bio/openpipeline"
-  git_tag: "0.2.0-1926-g2dbe3b72"
-package_config:
-  name: "openpipeline"
-  version: "dev"
-  info:
-    test_resources:
-    - type: "s3"
-      path: "s3://openpipelines-data"
-      dest: "resources_test"
-  viash_version: "0.9.0"
-  source: "src"
-  target: "target"
-  config_mods:
-  - ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
-    .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
-    .runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
-    \ := 'includeConfig(\"nextflow_labels.config\")'"
-  - ".engines += { type: \"native\" }"
-  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
-  - ".engines[.type == 'docker'].target_tag := 'dev'"
-  organization: "vsh"
-  links:
-    repository: "https://github.com/openpipelines-bio/openpipeline"
-    docker_registry: "ghcr.io"
-    homepage: "https://openpipelines.bio"
-    documentation: "https://openpipelines.bio/fundamentals"
-    issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
--- a/target/executable/dataflow/split_modalities/nextflow_labels.config
+++ b/target/executable/dataflow/split_modalities/nextflow_labels.config
@@ -1,42 +0,0 @@
-process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
-
-  // Retry for exit codes that have something to do with memory issues
-  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
-  maxRetries = 3
-  maxMemory = null
-
-  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
-  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
-}
-
-def get_memory(to_compare) {
-  if (!process.containsKey("maxMemory") || !process.maxMemory) {
-    return to_compare
-  }
-
-  try {
-    if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
-      return process.maxMemory
-    }
-    else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
-      return max_memory as nextflow.util.MemoryUnit
-    }
-    else {
-      return to_compare
-    }  
-  } catch (all) {
-        println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
-        System.exit(1)
-  }
-}
--- a/target/executable/dataflow/split_modalities/setup_logger.py
+++ b/target/executable/dataflow/split_modalities/setup_logger.py
@@ -1,12 +0,0 @@
-def setup_logger():
-    import logging
-    from sys import stdout
-
-    logger = logging.getLogger()
-    logger.setLevel(logging.INFO)
-    console_handler = logging.StreamHandler(stdout)
-    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
-    console_handler.setFormatter(logFormatter)
-    logger.addHandler(console_handler)
-
-    return logger
--- a/target/executable/dataflow/split_modalities/split_modalities
+++ b/target/executable/dataflow/split_modalities/split_modalities