Build branch main with version main (65dd41d)

Build pipeline: viash-hub.htrnaseq.main-vhms8 Source commit: 65dd41d8b1 Source message: Optimize spawning of processes
2024-11-05 17:26:35 +00:00
parent b8abf8c490
commit f2ff92c6ac
127 changed files with 29560 additions and 1004 deletions
--- a/target/executable/eset/create_eset/.config.vsh.yaml
+++ b/target/executable/eset/create_eset/.config.vsh.yaml
@@ -0,0 +1,228 @@
+name: "create_eset"
+namespace: "eset"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--pDataFile"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--fDataFile"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--mappingDir"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--poolName"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    info: null
+    default:
+    - "eset.$id.rds"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "r_script"
+  path: "script.R"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+test_resources:
+- type: "r_script"
+  path: "test.R"
+  is_executable: true
+- type: "file"
+  path: "pData.tsv"
+- type: "file"
+  path: "fData.tsv"
+- type: "file"
+  path: "mapping_dir"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "r-base:4.3.0"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "libcurl4-openssl-dev"
+    - "libssl-dev"
+    - "libxml2-dev"
+    - "libfftw3-dev"
+    - "libfontconfig1-dev"
+    - "libfreetype-dev"
+    - "libhdf5-dev"
+    - "bzip2"
+    - "libharfbuzz-dev"
+    - "libfribidi-dev"
+    - "libtiff-dev"
+    - "libgsl-dev"
+    - "libcairo-dev"
+    - "libudunits2-dev"
+    - "procps"
+    interactive: false
+  - type: "r"
+    cran:
+    - "nlcv"
+    bioc:
+    - "Biobase"
+    - "limma"
+    - "a4Core"
+    - "MLInterfaces"
+    - "multtest"
+    script:
+    - "remotes::install_url(\"https://cran.r-project.org/src/contrib/Archive/Matrix/Matrix_1.6-5.tar.gz\"\
+      , dependencies=TRUE, upgrade_dependencies=FALSE);\\\nremotes::install_url(\"\
+      https://cran.r-project.org/src/contrib/Archive/Seurat/Seurat_4.4.0.tar.gz\"\
+      , repos=BiocManager::repositories(), dependencies=TRUE, upgrade_dependencies=FALSE)\\\
+      \n"
+    bioc_force_install: false
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/eset/create_eset/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/eset/create_eset"
+  executable: "target/executable/eset/create_eset/create_eset"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/executable/eset/create_eset/create_eset
+++ b/target/executable/eset/create_eset/create_eset
--- a/target/executable/eset/create_eset/nextflow_labels.config
+++ b/target/executable/eset/create_eset/nextflow_labels.config
@@ -0,0 +1,105 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
+process {
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = 192.GB
+
+  // Resource labels
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
+  
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
+
+}
+
+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/executable/eset/create_fdata/.config.vsh.yaml
+++ b/target/executable/eset/create_fdata/.config.vsh.yaml
@@ -0,0 +1,185 @@
+name: "create_fdata"
+namespace: "eset"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--gtf"
+    description: "Genome annotation file in GTF format."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    description: "Tab-delimited text file containing information about the 'gene'\
+      \ or 'transcript'\nentries from the input GTF file. The 'transcript' entries\
+      \ are used in case the source\nof the GTF was 'refGene' or 'ncbiRefSeq'. \n"
+    info: null
+    default:
+    - "fData.$id.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "create_fdata.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Create a fdata file\n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "test_annotation.gtf"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "pandas"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/eset/create_fdata/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/eset/create_fdata"
+  executable: "target/executable/eset/create_fdata/create_fdata"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/executable/eset/create_fdata/create_fdata
+++ b/target/executable/eset/create_fdata/create_fdata
--- a/target/executable/eset/create_fdata/nextflow_labels.config
+++ b/target/executable/eset/create_fdata/nextflow_labels.config
@@ -0,0 +1,105 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
+process {
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = 192.GB
+
+  // Resource labels
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
+  
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
+
+}
+
+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/executable/eset/create_pdata/.config.vsh.yaml
+++ b/target/executable/eset/create_pdata/.config.vsh.yaml
@@ -0,0 +1,199 @@
+name: "create_pdata"
+namespace: "eset"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--star_stats_file"
+    description: "Tab-delimited text file containing statistics (per column) that\
+      \ were generated\nfrom the STAR log files (Log.final.out, Summary.csv, ReadsPerGene.out.tab).\n\
+      Each entry (row) in the file describes the values for one well (barcode).\n"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrGenesPerChromPool"
+    description: "Pivot table in tsv format of the combined nrReadsNrGenesPerChrom\
+      \ files from STAR. \nDescribes per chromosome (as columns) the number of reads,\
+      \ as well as the total number \nof reads per cell barcode and the percentage\
+      \ of nuclear, ERCC and mitochondrial\nreads.\n"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    info: null
+    default:
+    - "pData.$id.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "create_pdata.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Create a pdata file by combining the mapping statistics \n"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "nrReadsNrGenesPerChromPool.txt"
+- type: "file"
+  path: "starLogs.txt"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.12-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "pandas"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/eset/create_pdata/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/eset/create_pdata"
+  executable: "target/executable/eset/create_pdata/create_pdata"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/executable/eset/create_pdata/create_pdata
+++ b/target/executable/eset/create_pdata/create_pdata
--- a/target/executable/eset/create_pdata/nextflow_labels.config
+++ b/target/executable/eset/create_pdata/nextflow_labels.config
@@ -0,0 +1,105 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
+process {
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = 192.GB
+
+  // Resource labels
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
+  
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
+
+}
+
+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/executable/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml
+++ b/target/executable/integration_test_components/htrnaseq/check_eset/.config.vsh.yaml
@@ -0,0 +1,167 @@
+name: "check_eset"
+namespace: "integration_test_components/htrnaseq"
+version: "main"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--eset"
+    description: "Path to an ExpressionSet object."
+    info: null
+    example:
+    - "eset.rds"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--star_output"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+resources:
+- type: "r_script"
+  path: "script.R"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "This component test the ExpressionSet object as output by the main pipeline."
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "bioconductor/bioconductor_docker:3.19"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "r"
+    cran:
+    - "bit64"
+    bioc:
+    - "Biobase"
+    bioc_force_install: false
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/integration_test_components/htrnaseq/check_eset/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/integration_test_components/htrnaseq/check_eset"
+  executable: "target/executable/integration_test_components/htrnaseq/check_eset/check_eset"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/executable/integration_test_components/htrnaseq/check_eset/check_eset
+++ b/target/executable/integration_test_components/htrnaseq/check_eset/check_eset
--- a/target/executable/integration_test_components/htrnaseq/check_eset/nextflow_labels.config
+++ b/target/executable/integration_test_components/htrnaseq/check_eset/nextflow_labels.config
@@ -0,0 +1,105 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
+process {
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = 192.GB
+
+  // Resource labels
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
+  
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
+
+}
+
+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/executable/parallel_map/.config.vsh.yaml
+++ b/target/executable/parallel_map/.config.vsh.yaml
@@ -110,6 +110,8 @@ resources:
 - type: "bash_script"
  path: "script.sh"
  is_executable: true
+- type: "file"
+  path: "STAR"
 - type: "file"
  path: "nextflow_labels.config"
  dest: "nextflow_labels.config"
@@ -205,26 +207,25 @@ engines:
  - type: "apt"
    packages:
    - "procps"
-    - "gzip"
-    - "bzip2"
-    - "parallel"
    - "wget"
+    - "automake"
+    - "make"
+    - "gcc"
+    - "g++"
    - "zlib1g-dev"
-    - "unzip"
-    - "xxd"
+    - "parallel"
    - "file"
    interactive: false
  - type: "docker"
-    run:
-    - "wget -O $STAR_TARGET $STAR_SOURCE && \\\n  unzip $STAR_TARGET -d /tmp && \\\
-      \n  mv /tmp/STAR_$STAR_VERSION/Linux_x86_64_static/STAR /usr/local/bin/$STAR_BINARY\
-      \ && \\\n  chmod +x /usr/local/bin/$STAR_BINARY && \\\n  rm $STAR_TARGET &&\
-      \ rm -rf /tmp/STAR_$STAR_VERSION\n"
+    copy:
+    - "STAR /usr/local/bin/$STAR_BINARY"
+    build_args:
+    - "STAR_V=2.7.6a"
    env:
-    - "STAR_VERSION \"2.7.11b\""
-    - "STAR_SOURCE \"https://github.com/alexdobin/STAR/releases/download/$STAR_VERSION/STAR_$STAR_VERSION.zip\""
-    - "STAR_TARGET \"/tmp/star.zip\""
-    - "STAR_BINARY \"STAR\""
+    - "STAR_SOURCE=\"https://github.com/alexdobin/STAR/archive/refs/tags/$STAR_V.tar.gz\""
+    - "STAR_TARGET=\"/app/star-$STAR_V.tar.gz\""
+    - "STAR_INSTALL_DIR=\"/app/STAR-$STAR_V\""
+    - "STAR_BINARY=STAR"
  entrypoint: []
  cmd: null
 - type: "native"
@@ -235,15 +236,18 @@ build_info:
  engine: "docker|native"
  output: "target/executable/parallel_map"
  executable: "target/executable/parallel_map/parallel_map"
-  viash_version: "0.9.0-RC7"
-  git_commit: "cf9797232db1306bfd5696287928cababe317d99"
-  git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
  version: "main"
  description: "High-throughput pipeline [WIP]\n"
-  info: null
-  viash_version: "0.9.0-RC7"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
  source: "src"
  target: "target"
  config_mods:
--- a/target/executable/parallel_map/STAR
+++ b/target/executable/parallel_map/STAR
--- a/target/executable/parallel_map/nextflow_labels.config
+++ b/target/executable/parallel_map/nextflow_labels.config
@@ -1,26 +1,88 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
 process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36

  // Retry for exit codes that have something to do with memory issues
  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
  maxRetries = 3
-  maxMemory = null
+  maxMemory = 192.GB

  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }

 }

+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
 def get_memory(to_compare) {
    if (!process.containsKey("maxMemory") || !process.maxMemory) {
      return to_compare
--- a/target/executable/parallel_map/parallel_map
+++ b/target/executable/parallel_map/parallel_map
@@ -2,9 +2,9 @@

 # parallel_map main
 # 
-# This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
-# derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
-# Data Intuitive.
+# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
+# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
+# Intuitive.
 # 
 # The component may contain files which fall under a different license. The
 # authors of this component should specify the license in the header of such
@@ -502,23 +502,19 @@ function ViashDockerfile {
 FROM debian:stable-slim
 ENTRYPOINT []
 RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install -y procps gzip bzip2 parallel wget zlib1g-dev unzip xxd file && \
+  DEBIAN_FRONTEND=noninteractive apt-get install -y procps wget automake make gcc g++ zlib1g-dev parallel file && \
  rm -rf /var/lib/apt/lists/*

-ENV STAR_VERSION "2.7.11b"
-ENV STAR_SOURCE "https://github.com/alexdobin/STAR/releases/download/$STAR_VERSION/STAR_$STAR_VERSION.zip"
-ENV STAR_TARGET "/tmp/star.zip"
-ENV STAR_BINARY "STAR"
-RUN wget -O $STAR_TARGET $STAR_SOURCE && \
-  unzip $STAR_TARGET -d /tmp && \
-  mv /tmp/STAR_$STAR_VERSION/Linux_x86_64_static/STAR /usr/local/bin/$STAR_BINARY && \
-  chmod +x /usr/local/bin/$STAR_BINARY && \
-  rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION
-
+ARG STAR_V
+ENV STAR_SOURCE="https://github.com/alexdobin/STAR/archive/refs/tags/$STAR_V.tar.gz"
+ENV STAR_TARGET="/app/star-$STAR_V.tar.gz"
+ENV STAR_INSTALL_DIR="/app/STAR-$STAR_V"
+ENV STAR_BINARY=STAR
+COPY STAR /usr/local/bin/$STAR_BINARY
 LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
-LABEL org.opencontainers.image.created="2024-09-17T08:52:47Z"
+LABEL org.opencontainers.image.created="2024-11-05T15:39:41Z"
 LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
-LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
+LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
 LABEL org.opencontainers.image.version="main"

 VIASHDOCKER
@@ -532,7 +528,7 @@ function ViashDockerBuildArgs {
  local engine_id="$1"

  if [[ "$engine_id" == "docker" ]]; then
-    echo ""
+    echo "--build-arg 'STAR_V=2.7.6a'"
  fi
 }

--- a/target/executable/stats/combine_star_logs/.config.vsh.yaml
+++ b/target/executable/stats/combine_star_logs/.config.vsh.yaml
@@ -8,7 +8,7 @@ argument_groups:
    name: "--barcodes"
    description: "Barcodes responding to the respective log files.\n"
    info: null
-    required: false
+    required: true
    direction: "input"
    multiple: true
    multiple_sep: ";"
@@ -20,7 +20,7 @@ argument_groups:
    - "Log.final.out"
    must_exist: true
    create_parent: true
-    required: false
+    required: true
    direction: "input"
    multiple: true
    multiple_sep: ";"
@@ -34,7 +34,7 @@ argument_groups:
    - "Summary.txt"
    must_exist: true
    create_parent: true
-    required: false
+    required: true
    direction: "input"
    multiple: true
    multiple_sep: ";"
@@ -44,7 +44,7 @@ argument_groups:
    info: null
    must_exist: true
    create_parent: true
-    required: false
+    required: true
    direction: "input"
    multiple: true
    multiple_sep: ";"
@@ -182,15 +182,18 @@ build_info:
  engine: "docker|native"
  output: "target/executable/stats/combine_star_logs"
  executable: "target/executable/stats/combine_star_logs/combine_star_logs"
-  viash_version: "0.9.0-RC7"
-  git_commit: "cf9797232db1306bfd5696287928cababe317d99"
-  git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
  version: "main"
  description: "High-throughput pipeline [WIP]\n"
-  info: null
-  viash_version: "0.9.0-RC7"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
  source: "src"
  target: "target"
  config_mods:
--- a/target/executable/stats/combine_star_logs/combine_star_logs
+++ b/target/executable/stats/combine_star_logs/combine_star_logs
@@ -2,9 +2,9 @@

 # combine_star_logs main
 # 
-# This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
-# derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
-# Data Intuitive.
+# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
+# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
+# Intuitive.
 # 
 # The component may contain files which fall under a different license. The
 # authors of this component should specify the license in the header of such
@@ -175,16 +175,16 @@ function ViashHelp {
  echo ""
  echo "Arguments:"
  echo "    --barcodes"
-  echo "        type: string, multiple values allowed"
+  echo "        type: string, required parameter, multiple values allowed"
  echo "        Barcodes responding to the respective log files."
  echo ""
  echo "    --star_logs"
-  echo "        type: file, multiple values allowed, file must exist"
+  echo "        type: file, required parameter, multiple values allowed, file must exist"
  echo "        example: Log.final.out"
  echo "        Paths to the STAR log files (most frequently called Log.final.out)"
  echo ""
  echo "    --gene_summary_logs"
-  echo "        type: file, multiple values allowed, file must exist"
+  echo "        type: file, required parameter, multiple values allowed, file must exist"
  echo "        example: Summary.txt"
  echo "        Paths to the Summary.csv files from the STAR Solo output. Can be found"
  echo "        in"
@@ -192,7 +192,7 @@ function ViashHelp {
  echo "        directory."
  echo ""
  echo "    --reads_per_gene_logs"
-  echo "        type: file, multiple values allowed, file must exist"
+  echo "        type: file, required parameter, multiple values allowed, file must exist"
  echo "        Paths to the 'ReadsPerGene.out.tab' files as output by STAR."
  echo ""
  echo "    --output"
@@ -486,9 +486,9 @@ RUN pip install --upgrade pip && \
  pip install --upgrade --no-cache-dir "pandas"

 LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs"
-LABEL org.opencontainers.image.created="2024-09-17T08:52:49Z"
+LABEL org.opencontainers.image.created="2024-11-05T15:39:41Z"
 LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
-LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
+LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
 LABEL org.opencontainers.image.version="main"

 VIASHDOCKER
@@ -879,6 +879,22 @@ fi


 # check whether required parameters exist
+if [ -z ${VIASH_PAR_BARCODES+x} ]; then
+  ViashError '--barcodes' is a required argument. Use "--help" to get more information on the parameters.
+  exit 1
+fi
+if [ -z ${VIASH_PAR_STAR_LOGS+x} ]; then
+  ViashError '--star_logs' is a required argument. Use "--help" to get more information on the parameters.
+  exit 1
+fi
+if [ -z ${VIASH_PAR_GENE_SUMMARY_LOGS+x} ]; then
+  ViashError '--gene_summary_logs' is a required argument. Use "--help" to get more information on the parameters.
+  exit 1
+fi
+if [ -z ${VIASH_PAR_READS_PER_GENE_LOGS+x} ]; then
+  ViashError '--reads_per_gene_logs' is a required argument. Use "--help" to get more information on the parameters.
+  exit 1
+fi
 if [ -z ${VIASH_META_NAME+x} ]; then
  ViashError 'name' is a required argument. Use "--help" to get more information on the parameters.
  exit 1
@@ -1145,7 +1161,6 @@ trap interrupt INT SIGINT
 cat > "\$tempscript" << 'VIASHMAIN'
 import logging
 import pandas as pd
-import numpy as np
 from itertools import batched, starmap

 ### VIASH START
@@ -1192,7 +1207,7 @@ logger.setLevel(logging.DEBUG)
 def handle_percentages(column_value):
    # TODO: handle this more gracefully
    if column_value:
-        return np.float64(column_value.strip('%'))
+        return column_value.strip('%')
    return column_value

 def star_log_to_dataframe(barcode: str, log_path) -> pd.DataFrame:
@@ -1210,7 +1225,7 @@ def summary_to_dataframe(barcode: str, summary_path) -> pd.DataFrame:
    logger.info("Reading summary log %s for barcode %s", summary_path, barcode)
    result = pd.read_table(summary_path, sep=",",
                           header=None, names=["Category", "Value"],
-                           index_col=0)
+                           index_col=0, dtype=pd.StringDtype())
    logger.info("Read %d row(s) and %d column(s) from summary file at %s",
                *result.shape, summary_path)
    return result
@@ -1219,9 +1234,14 @@ def summary_to_dataframe(barcode: str, summary_path) -> pd.DataFrame:
 def reads_per_gene_to_dataframe(barcode, read_per_gene_path) -> pd.DataFrame:
    logger.info("Reading reads per gene file %s for barcode %s", read_per_gene_path, barcode)
    result = pd.read_table(read_per_gene_path, skiprows=[0, 1, 2, 3], header=None, sep="\\t",
+                           dtype={"geneID": pd.StringDtype(),
+                                  "Unstranded": pd.Int64Dtype(),
+                                  "posStrand": pd.Int64Dtype(),
+                                  "negStrand": pd.Int64Dtype()},
                           index_col=0, names=["geneID", "Unstranded", "posStrand", "negStrand"])
    result = result[["Unstranded"]] # Do not use .loc here because we need a DataFrame, not a Series
    df = pd.DataFrame({"Value": result.sum()})
+    df = df.rename({"Unstranded": "NumberOfCountedReads"}, errors="raise")
    df.index.name = "Category"
    logger.info("Read %d row(s) and %d column(s) from reads per gene file at %s",
                *df.shape, read_per_gene_path)
@@ -1250,12 +1270,16 @@ def star_log_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame
                "\\n\\t".join(to_keep[~to_keep].index.to_list()))
    result = df.loc[to_keep]

+    # Replace % by pect, remove columns, use camel case and remove spaces
+    # You might be tempted to use .title() to make everything uppercase,
+    # but characters which are already uppercase should stay that way.
+    # (example: NumberOfUMIs and not NumberOfUmis)
    result.index = result.index.str.replace("%", "pect")\\
                    .str.replace(":", "")\\
                    .str.replace(r"(?:^|\\s).", lambda m:m.group(0).upper(), regex=True)\\
                    .str.replace(" ", "")
    result = result.rename({"UniquelyMappedReadsNumber": "NumberOfMappedReads", 
-                            "UniquelyMappedReadsPect": "pctMappedReads"}, errors="raise")
+                            "UniquelyMappedReadsPect": "PctMappedReads"}, errors="raise")
    logger.info("Done filtering STAR logs for barcode %s. Result has %d row(s) and %d column(s). "
                "Found entries:\\n\\t%s", 
                barcode, *result.shape, "\\n\\t".join(result.index.to_list()))
@@ -1271,13 +1295,9 @@ def summary_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame)
        "Reads Mapped to Genome: Unique",
        "Reads Mapped to Transcriptome: Unique Genes",
        "Reads in Cells Mapped to Unique Genes",
-        "Mean Reads per Cell",
        "Median UMI per Cell",
        "Median Genes per Cell",
-        "Q30 Bases in CB+UMI",
        "Reads Mapped to Genome: Unique+Multiple",
-        "Reads Mapped to Transcriptome: Unique+Multipe Genes",
-        "Fraction of Reads in Cells",
        "Median Reads per Cell",
        "Mean UMI per Cell",
        "Mean Genes per Cell",
@@ -1290,8 +1310,15 @@ def summary_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame)
    result = df.loc[to_keep]
    result.index = result.index.str.replace(r"(?:^|\\s).", lambda m:m.group(0).upper(),
                                            regex=True).str.replace(" ", "")
-    result = result.rename({"UMIsInCells": "NumberOfUMIs", 
-                            "TotalGenesDetected": "NumberOfGenes"}, errors="raise")
+    to_rename = {"UMIsInCells": "NumberOfUMIs", 
+                 "TotalGenesDetected": "NumberOfGenes"}
+    try:
+        result = result.rename(to_rename, errors="raise")
+    except KeyError as e:
+        raise KeyError(f"Tried to rename log entries ({','.join(to_rename)}) in the summary "
+                       f"log for barcode {barcode}, but an entry was not found in the file. "
+                       "Make sure that you are using the correct version of STAR."
+                       f"Available entries: {", ".join(result.index.to_list())}") from e
    logger.info("Done filtering summary logs for barcode %s. Result has %d row(s) and %d column(s). "
                "Found entries:\\n\\t%s",
                barcode, *result.shape, "\\n\\t".join(result.index.to_list()))
@@ -1340,13 +1367,40 @@ def main(par):
    all_stats = pd.concat(all_logs_data, axis=1)
    logger.info("Log statistics were gathered for the following barcodes: %s", 
                ", ".join(all_stats.index.to_list()))
+    dtypes = {
+        'NumberOfInputReads': pd.UInt64Dtype(),
+        'NumberOfMappedReads': pd.UInt64Dtype(),
+        'PctMappedReads': pd.Float64Dtype(),
+        'NumberOfReadsMappedToMultipleLoci': pd.UInt64Dtype(),
+        'PectOfReadsMappedToMultipleLoci':  pd.Float64Dtype(), 
+        'NumberOfReadsMappedToTooManyLoci': pd.UInt64Dtype(),
+        'PectOfReadsMappedToTooManyLoci':  pd.Float64Dtype(),
+        'NumberOfReadsUnmappedTooManyMismatches': pd.UInt64Dtype(),
+        'PectOfReadsUnmappedTooManyMismatches':  pd.Float64Dtype(),
+        'NumberOfReadsUnmappedTooShort': pd.UInt64Dtype(), 
+        'PectOfReadsUnmappedTooShort':  pd.Float64Dtype(),
+        'NumberOfReadsUnmappedOther': pd.UInt64Dtype(),
+        'PectOfReadsUnmappedOther': pd.Float64Dtype(),
+        'ReadsWithValidBarcodes': pd.Float64Dtype(),
+        'SequencingSaturation': pd.Float64Dtype(),
+        'Q30BasesInCB+UMI': pd.Float64Dtype(),
+        'ReadsMappedToTranscriptome:Unique+MultipeGenes': pd.Float64Dtype(),
+        'EstimatedNumberOfCells': pd.UInt64Dtype(),
+        'FractionOfReadsInCells': pd.Float64Dtype(),
+        'MeanReadsPerCell': pd.UInt64Dtype(),
+        'NumberOfUMIs': pd.UInt64Dtype(),
+        'NumberOfGenes': pd.UInt64Dtype(),
+        'NumberOfCountedReads': pd.UInt64Dtype(),
+    }
+    all_stats = all_stats.astype(dtypes) 
    # batched() is used here to print a limited amount of columnns at a time
    # to make sure that they are all displayed (pandas might limit the view for readability)
    logger.info("Summary of final output:\\n%s\\n",
                "\\n".join(repr(all_stats.loc[:,columns].describe())
                          for columns in batched(all_stats.columns, 3))) 
    logger.info("Writing output to %s", par["output"])
-    all_stats.reset_index("WellBC").to_csv(par["output"], sep="\\t", header=True, index=False)
+    all_stats.reset_index("WellBC").to_csv(par["output"], sep="\\t", header=True,
+                                           index=False, float_format='%g')
    logger.info("Finished %s.", meta["name"])

 if __name__ == "__main__":
--- a/target/executable/stats/combine_star_logs/nextflow_labels.config
+++ b/target/executable/stats/combine_star_logs/nextflow_labels.config
@@ -1,26 +1,88 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
 process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36

  // Retry for exit codes that have something to do with memory issues
  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
  maxRetries = 3
-  maxMemory = null
+  maxMemory = 192.GB

  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }

 }

+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
 def get_memory(to_compare) {
    if (!process.containsKey("maxMemory") || !process.maxMemory) {
      return to_compare
--- a/target/executable/stats/generate_pool_statistics/.config.vsh.yaml
+++ b/target/executable/stats/generate_pool_statistics/.config.vsh.yaml
@@ -124,7 +124,7 @@ runners:
 engines:
 - type: "docker"
  id: "docker"
-  image: "python:3.11-slim"
+  image: "python:3.12-slim"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
@@ -154,15 +154,18 @@ build_info:
  engine: "docker|native"
  output: "target/executable/stats/generate_pool_statistics"
  executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
-  viash_version: "0.9.0-RC7"
-  git_commit: "cf9797232db1306bfd5696287928cababe317d99"
-  git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
  version: "main"
  description: "High-throughput pipeline [WIP]\n"
-  info: null
-  viash_version: "0.9.0-RC7"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
  source: "src"
  target: "target"
  config_mods:
--- a/target/executable/stats/generate_pool_statistics/generate_pool_statistics
+++ b/target/executable/stats/generate_pool_statistics/generate_pool_statistics
@@ -2,9 +2,9 @@

 # generate_pool_statistics main
 # 
-# This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
-# derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
-# Data Intuitive.
+# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
+# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
+# Intuitive.
 # 
 # The component may contain files which fall under a different license. The
 # authors of this component should specify the license in the header of such
@@ -468,7 +468,7 @@ function ViashDockerfile {

  if [[ "$engine_id" == "docker" ]]; then
    cat << 'VIASHDOCKER'
-FROM python:3.11-slim
+FROM python:3.12-slim
 ENTRYPOINT []
 RUN apt-get update && \
  DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \
@@ -478,9 +478,9 @@ RUN pip install --upgrade pip && \
  pip install --upgrade --no-cache-dir "pandas"

 LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics"
-LABEL org.opencontainers.image.created="2024-09-17T08:52:49Z"
+LABEL org.opencontainers.image.created="2024-11-05T15:39:43Z"
 LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
-LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
+LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
 LABEL org.opencontainers.image.version="main"

 VIASHDOCKER
@@ -1080,9 +1080,13 @@ if __name__ == "__main__":
    nr_reads_nr_genes_wells = []
    for nr_reads_nr_genes_file in par["nrReadsNrGenesPerChrom"]:
        nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file, 
-                                                   header=0, delimiter="\\t"))
-    nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True)
-    total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr", 
+                                                   header=0, delimiter="\\t",
+                                                   dtype={"WellBC":	pd.StringDtype(),
+                                                          "Chr": pd.StringDtype(),
+                                                          "NumberOfReads": pd.UInt64Dtype(),
+                                                          "NumberOfGenes": pd.UInt64Dtype()}))
+    nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True,)
+    total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
                                                                       values=["NumberOfReads"], fill_value=0,
                                                                       aggfunc="sum").droplevel(0, axis=1)
    total_nr_reads_per_chromosome.columns.name = None
@@ -1123,19 +1127,25 @@ if __name__ == "__main__":
    total_chromosomal_reads = total_nr_reads_per_chromosome.loc[:,matching_chromosomes].sum(axis=1)
    percentage_chromosomal_reads = round(total_chromosomal_reads / total_sum_of_reads * 100, 2)

+    cols_to_add = {
+        "pctChrom": percentage_chromosomal_reads,
+        "pctMT": percentage_mitochondrial_reads,
+        "pctERCC": percentage_ercc_reads,
+        "SumReads": total_sum_of_reads,
+        "NumberOfGenes": total_nr_genes,
+        "NumberOfERCCReads": total_ercc_reads,
+        "NumberOfChromReads": total_chromosomal_reads,
+        "NumberOfMTReads": mitochondrial_reads,
+    }
    total_nr_reads_per_chromosome = total_nr_reads_per_chromosome.assign(
-        pctChrom=percentage_chromosomal_reads,
-        pctMT=percentage_mitochondrial_reads,
-        pctERCC=percentage_ercc_reads,
-        SumReads=total_sum_of_reads,
-        NumberOfGenes=total_nr_genes,
+       **cols_to_add
    )

    total_nr_reads_per_chromosome.reset_index(names="WellBC")\\
        .to_csv(par["nrReadsNrGenesPerChromPool"], sep="\\t",
-                header=True, index=False, 
-                columns=("WellBC",) + tuple(chromosome_names) + \\
-                        ("SumReads", "pctMT", "pctERCC", "pctChrom", "NumberOfGenes"))
+                header=True, index=False, float_format="%g",
+                columns=("WellBC",) + tuple(chromosome_names) + tuple(cols_to_add.keys())
+               )
 VIASHMAIN
 python -B "\$tempscript" &
 wait "\$!"
--- a/target/executable/stats/generate_pool_statistics/nextflow_labels.config
+++ b/target/executable/stats/generate_pool_statistics/nextflow_labels.config
@@ -1,26 +1,88 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
 process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36

  // Retry for exit codes that have something to do with memory issues
  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
  maxRetries = 3
-  maxMemory = null
+  maxMemory = 192.GB

  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }

 }

+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
 def get_memory(to_compare) {
    if (!process.containsKey("maxMemory") || !process.maxMemory) {
      return to_compare
--- a/target/executable/stats/generate_well_statistics/.config.vsh.yaml
+++ b/target/executable/stats/generate_well_statistics/.config.vsh.yaml
@@ -225,15 +225,18 @@ build_info:
  engine: "docker|native"
  output: "target/executable/stats/generate_well_statistics"
  executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
-  viash_version: "0.9.0-RC7"
-  git_commit: "cf9797232db1306bfd5696287928cababe317d99"
-  git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
+  viash_version: "0.9.0"
+  git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
+  git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
  version: "main"
  description: "High-throughput pipeline [WIP]\n"
-  info: null
-  viash_version: "0.9.0-RC7"
+  info:
+    test_resources:
+    - path: "gs://viash-hub-test-data/htrnaseq/v1/"
+      dest: "resources_test"
+  viash_version: "0.9.0"
  source: "src"
  target: "target"
  config_mods:
--- a/target/executable/stats/generate_well_statistics/generate_well_statistics
+++ b/target/executable/stats/generate_well_statistics/generate_well_statistics
@@ -2,9 +2,9 @@

 # generate_well_statistics main
 # 
-# This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
-# derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
-# Data Intuitive.
+# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
+# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
+# Intuitive.
 # 
 # The component may contain files which fall under a different license. The
 # authors of this component should specify the license in the header of such
@@ -511,9 +511,9 @@ RUN pip install --upgrade pip && \
  pip install --upgrade --no-cache-dir "pysam" "pandas"

 LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics"
-LABEL org.opencontainers.image.created="2024-09-17T08:52:48Z"
+LABEL org.opencontainers.image.created="2024-11-05T15:39:43Z"
 LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
-LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
+LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
 LABEL org.opencontainers.image.version="main"

 VIASHDOCKER
--- a/target/executable/stats/generate_well_statistics/nextflow_labels.config
+++ b/target/executable/stats/generate_well_statistics/nextflow_labels.config
@@ -1,26 +1,88 @@
+executor {
+  $k8s {
+    submitRateLimit = '10sec'
+    pollInterval = '1 sec'
+  }
+}
+
 process {
-  // Default resources for components that hardly do any processing
-  memory = { 2.GB * task.attempt }
-  cpus = 1
+  container = 'nextflow/bash:latest'
+  
+  // default resources
+  memory = { 8.Gb * task.attempt }
+  cpus = 8
+  maxForks = 36

  // Retry for exit codes that have something to do with memory issues
  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
  maxRetries = 3
-  maxMemory = null
+  maxMemory = 192.GB

  // Resource labels
-  withLabel: singlecpu { cpus = 1 }
-  withLabel: lowcpu { cpus = 4 }
-  withLabel: midcpu { cpus = 10 }
-  withLabel: highcpu { cpus = 20 }
+  withLabel: verylowcpu { cpus = 2 }
+  withLabel: lowcpu { cpus = 8 }
+  withLabel: midcpu { cpus = 16 }
+  withLabel: highcpu { cpus = 32 }
  
-  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
-  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
-  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
-  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+  withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }

 }

+profiles {
+  // detect tempdir
+  tempDir = java.nio.file.Paths.get(
+    System.getenv('NXF_TEMP') ?:
+      System.getenv('VIASH_TEMP') ?: 
+      System.getenv('TEMPDIR') ?: 
+      System.getenv('TMPDIR') ?: 
+      '/tmp'
+  ).toAbsolutePath()
+
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+
+  docker {
+    docker.fixOwnership    = true
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+
+  local {
+    // This config is for local processing.
+    process {
+        maxMemory = 25.GB
+        withLabel: verylowcpu { cpus = 2 }
+        withLabel: lowcpu { cpus = 4 }
+        withLabel: midcpu { cpus = 6 }
+        withLabel: highcpu { cpus = 12 }
+  
+        withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
+        withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
+        withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
+    }
+  }
+}
+
 def get_memory(to_compare) {
    if (!process.containsKey("maxMemory") || !process.maxMemory) {
      return to_compare