Build branch main with version main (21831c2)

Build pipeline: viash-hub.htrnaseq.main-n8w4c Source commit: 21831c2104 Source message: Fix well_demultiplex test
2024-08-29 08:10:20 +00:00
commit 044a3af7a9
59 changed files with 33214 additions and 0 deletions
--- a/target/nextflow/workflows/parallel_map_wf/.config.vsh.yaml
+++ b/target/nextflow/workflows/parallel_map_wf/.config.vsh.yaml
@@ -0,0 +1,195 @@
+name: "parallel_map_wf"
+namespace: "workflows"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input_r1"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--input_r2"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--barcode"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--pool"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--genomeDir"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--output"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: true
+    multiple_sep: ";"
+resources:
+- type: "nextflow_script"
+  path: "main.nf"
+  is_executable: true
+  entrypoint: "run_wf"
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Map RNA sequencing data, provided as fastq files (paired-end) to a reference\
+  \ genome using STAR Solo.\nInput data must have been demultiplexed beforehand, meaning\
+  \ that a single fastq pair provides data for\none barcode (one well). Multiple wells\
+  \ can be mapped in parallel by providing multiple events to the \nworkflow. Output\
+  \ is provided as mapped output per pool, i.e. one output is provided per pool.xx\n"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+dependencies:
+- name: "parallel_map"
+  repository:
+    type: "local"
+- name: "workflows/utils/groupWells"
+  repository:
+    type: "local"
+repositories:
+- type: "local"
+  name: "local"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "native"
+  id: "native"
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/workflows/parallel_map_wf/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "native|native"
+  output: "target/nextflow/workflows/parallel_map_wf"
+  executable: "target/nextflow/workflows/parallel_map_wf/main.nf"
+  viash_version: "0.9.0-RC7"
+  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_remote: "https://github.com/viash-hub/htrnaseq"
+  dependencies:
+  - "target/nextflow/parallel_map"
+  - "target/nextflow/workflows/utils/groupWells"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info: null
+  viash_version: "0.9.0-RC7"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/nextflow/workflows/parallel_map_wf/main.nf
+++ b/target/nextflow/workflows/parallel_map_wf/main.nf
--- a/target/nextflow/workflows/parallel_map_wf/nextflow.config
+++ b/target/nextflow/workflows/parallel_map_wf/nextflow.config
@@ -0,0 +1,125 @@
+manifest {
+  name = 'workflows/parallel_map_wf'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = 'main'
+  description = 'Map RNA sequencing data, provided as fastq files (paired-end) to a reference genome using STAR Solo.\nInput data must have been demultiplexed beforehand, meaning that a single fastq pair provides data for\none barcode (one well). Multiple wells can be mapped in parallel by providing multiple events to the \nworkflow. Output is provided as mapped output per pool, i.e. one output is provided per pool.xx\n'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/nextflow/workflows/parallel_map_wf/nextflow_labels.config
+++ b/target/nextflow/workflows/parallel_map_wf/nextflow_labels.config
@@ -0,0 +1,43 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
+  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/nextflow/workflows/parallel_map_wf/nextflow_schema.json
+++ b/target/nextflow/workflows/parallel_map_wf/nextflow_schema.json
@@ -0,0 +1,121 @@
+{
+"$schema": "http://json-schema.org/draft-07/schema",
+"title": "parallel_map_wf",
+"description": "Map RNA sequencing data, provided as fastq files (paired-end) to a reference genome using STAR Solo.\nInput data must have been demultiplexed beforehand, meaning that a single fastq pair provides data for\none barcode (one well). Multiple wells can be mapped in parallel by providing multiple events to the \nworkflow. Output is provided as mapped output per pool, i.e. one output is provided per pool.xx\n",
+"type": "object",
+"definitions": {
+
+    
+    
+    "arguments" : {
+    "title": "Arguments",
+    "type": "object",
+    "description": "No description",
+    "properties": {
+    
+        
+                "input_r1": {
+                "type":
+                "string",
+                "description": "Type: `file`, required. ",
+                "help_text": "Type: `file`, required. "
+            
+            }
+    
+
+        ,
+                "input_r2": {
+                "type":
+                "string",
+                "description": "Type: `file`, required. ",
+                "help_text": "Type: `file`, required. "
+            
+            }
+    
+
+        ,
+                "barcode": {
+                "type":
+                "string",
+                "description": "Type: `string`, required. ",
+                "help_text": "Type: `string`, required. "
+            
+            }
+    
+
+        ,
+                "pool": {
+                "type":
+                "string",
+                "description": "Type: `string`, required. ",
+                "help_text": "Type: `string`, required. "
+            
+            }
+    
+
+        ,
+                "genomeDir": {
+                "type":
+                "string",
+                "description": "Type: `file`, required. ",
+                "help_text": "Type: `file`, required. "
+            
+            }
+    
+
+        ,
+                "output": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. ",
+                "help_text": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. "
+            ,
+                "default": "$id.$key.output_*.output_*"
+            }
+    
+
+}
+},
+    
+    
+    "nextflow input-output arguments" : {
+    "title": "Nextflow input-output arguments",
+    "type": "object",
+    "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+    "properties": {
+    
+        
+                "publish_dir": {
+                "type":
+                "string",
+                "description": "Type: `string`, required, example: `output/`. Path to an output directory",
+                "help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
+            
+            }
+    
+
+        ,
+                "param_list": {
+                "type":
+                "string",
+                "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
+                "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
+                "hidden": true
+            
+            }
+    
+
+}
+}
+},
+"allOf": [
+
+    {
+    "$ref": "#/definitions/arguments"
+    },
+
+    {
+    "$ref": "#/definitions/nextflow input-output arguments"
+    }
+]
+}