Build branch v0.3 with version v0.3.0 (bc7a95f)
Build pipeline: viash-hub.htrnaseq.v0.3-bxj5m
Source commit: bc7a95f98f
Source message: Bump version to v0.3.0 (2)
This commit is contained in:
232
target/executable/eset/create_eset/.config.vsh.yaml
Normal file
232
target/executable/eset/create_eset/.config.vsh.yaml
Normal file
@@ -0,0 +1,232 @@
|
||||
name: "create_eset"
|
||||
namespace: "eset"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--pDataFile"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--fDataFile"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--mappingDir"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--poolName"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
info: null
|
||||
default:
|
||||
- "eset.$id.rds"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "r_script"
|
||||
path: "script.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
test_resources:
|
||||
- type: "r_script"
|
||||
path: "test.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "pData.tsv"
|
||||
- type: "file"
|
||||
path: "fData.tsv"
|
||||
- type: "file"
|
||||
path: "mapping_dir"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "rocker/r2u:24.04"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "r"
|
||||
cran:
|
||||
- "data.table"
|
||||
- "nlcv"
|
||||
bioc:
|
||||
- "Seurat"
|
||||
bioc_force_install: false
|
||||
test_setup:
|
||||
- type: "r"
|
||||
cran:
|
||||
- "testthat"
|
||||
bioc_force_install: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/eset/create_eset/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/eset/create_eset"
|
||||
executable: "target/executable/eset/create_eset/create_eset"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1621
target/executable/eset/create_eset/create_eset
Executable file
1621
target/executable/eset/create_eset/create_eset
Executable file
File diff suppressed because it is too large
Load Diff
108
target/executable/eset/create_eset/nextflow_labels.config
Normal file
108
target/executable/eset/create_eset/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
211
target/executable/eset/create_fdata/.config.vsh.yaml
Normal file
211
target/executable/eset/create_fdata/.config.vsh.yaml
Normal file
@@ -0,0 +1,211 @@
|
||||
name: "create_fdata"
|
||||
namespace: "eset"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--gtf"
|
||||
description: "Genome annotation file in GTF format."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Tab-delimited text file containing information about the 'gene'\
|
||||
\ or 'transcript'\nentries from the input GTF file. The 'transcript' entries\
|
||||
\ are used in case the source\nof the GTF was 'refGene' or 'ncbiRefSeq'. \n"
|
||||
info: null
|
||||
default:
|
||||
- "fData.$id.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "create_fdata.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Create a fdata file\n"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test_annotation.gtf"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/eset/create_fdata/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/eset/create_fdata"
|
||||
executable: "target/executable/eset/create_fdata/create_fdata"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1212
target/executable/eset/create_fdata/create_fdata
Executable file
1212
target/executable/eset/create_fdata/create_fdata
Executable file
File diff suppressed because it is too large
Load Diff
108
target/executable/eset/create_fdata/nextflow_labels.config
Normal file
108
target/executable/eset/create_fdata/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
225
target/executable/eset/create_pdata/.config.vsh.yaml
Normal file
225
target/executable/eset/create_pdata/.config.vsh.yaml
Normal file
@@ -0,0 +1,225 @@
|
||||
name: "create_pdata"
|
||||
namespace: "eset"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--star_stats_file"
|
||||
description: "Tab-delimited text file containing statistics (per column) that\
|
||||
\ were generated\nfrom the STAR log files (Log.final.out, Summary.csv, ReadsPerGene.out.tab).\n\
|
||||
Each entry (row) in the file describes the values for one well (barcode).\n"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChromPool"
|
||||
description: "Pivot table in tsv format of the combined nrReadsNrGenesPerChrom\
|
||||
\ files from STAR. \nDescribes per chromosome (as columns) the number of reads,\
|
||||
\ as well as the total number \nof reads per cell barcode and the percentage\
|
||||
\ of nuclear, ERCC and mitochondrial\nreads.\n"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
info: null
|
||||
default:
|
||||
- "pData.$id.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "create_pdata.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Create a pdata file by combining the mapping statistics \n"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nrReadsNrGenesPerChromPool.txt"
|
||||
- type: "file"
|
||||
path: "starLogs.txt"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/eset/create_pdata/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/eset/create_pdata"
|
||||
executable: "target/executable/eset/create_pdata/create_pdata"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1178
target/executable/eset/create_pdata/create_pdata
Executable file
1178
target/executable/eset/create_pdata/create_pdata
Executable file
File diff suppressed because it is too large
Load Diff
108
target/executable/eset/create_pdata/nextflow_labels.config
Normal file
108
target/executable/eset/create_pdata/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
name: "check_eset"
|
||||
namespace: "integration_test_components/htrnaseq"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--eset"
|
||||
description: "Path to an ExpressionSet object."
|
||||
info: null
|
||||
example:
|
||||
- "eset.rds"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--star_output"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "r_script"
|
||||
path: "script.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "This component test the ExpressionSet object as output by the main pipeline."
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "bioconductor/bioconductor_docker:3.19"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "r"
|
||||
cran:
|
||||
- "bit64"
|
||||
bioc:
|
||||
- "Biobase"
|
||||
bioc_force_install: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integration_test_components/htrnaseq/check_eset/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integration_test_components/htrnaseq/check_eset"
|
||||
executable: "target/executable/integration_test_components/htrnaseq/check_eset/check_eset"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1312
target/executable/integration_test_components/htrnaseq/check_eset/check_eset
Executable file
1312
target/executable/integration_test_components/htrnaseq/check_eset/check_eset
Executable file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
name: "check_cutadapt_output"
|
||||
namespace: "integration_test_components/well_demultiplexing"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--fastq_r1"
|
||||
description: "Path to the forward reads to test."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--fastq_r2"
|
||||
description: "Path to the reverse reads to test."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--ids"
|
||||
description: "Well IDs for the corresponding fastq input"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "This component test the cutadapt output from the well_demultiplex subworkflow."
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "dnaio"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integration_test_components/well_demultiplexing/check_cutatapt_output/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output"
|
||||
executable: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
177
target/executable/io/publish_fastqs/.config.vsh.yaml
Normal file
177
target/executable/io/publish_fastqs/.config.vsh.yaml
Normal file
@@ -0,0 +1,177 @@
|
||||
name: "publish_fastqs"
|
||||
namespace: "io"
|
||||
version: "v0.3.0"
|
||||
argument_groups:
|
||||
- name: "Input arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input_r1"
|
||||
description: "Directory to write R1 fastq data to"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_r2"
|
||||
description: "Directory to write R2 fastq data to"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- name: "Output arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
info: null
|
||||
default:
|
||||
- "$id"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "bash_script"
|
||||
path: "code.sh"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Publish the fastq files per well"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "debian:stable-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/io/publish_fastqs/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/publish_fastqs"
|
||||
executable: "target/executable/io/publish_fastqs/publish_fastqs"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
108
target/executable/io/publish_fastqs/nextflow_labels.config
Normal file
108
target/executable/io/publish_fastqs/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
1187
target/executable/io/publish_fastqs/publish_fastqs
Executable file
1187
target/executable/io/publish_fastqs/publish_fastqs
Executable file
File diff suppressed because it is too large
Load Diff
221
target/executable/io/publish_results/.config.vsh.yaml
Normal file
221
target/executable/io/publish_results/.config.vsh.yaml
Normal file
@@ -0,0 +1,221 @@
|
||||
name: "publish_results"
|
||||
namespace: "io"
|
||||
version: "v0.3.0"
|
||||
argument_groups:
|
||||
- name: "Input arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--star_output"
|
||||
description: "Output from mapping with STAR"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--star_qc_metrics"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--eset"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--f_data"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--p_data"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--html_report"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Output arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
info: null
|
||||
default:
|
||||
- "$id"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "bash_script"
|
||||
path: "code.sh"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Publish the results"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "debian:stable-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/io/publish_results/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/publish_results"
|
||||
executable: "target/executable/io/publish_results/publish_results"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
108
target/executable/io/publish_results/nextflow_labels.config
Normal file
108
target/executable/io/publish_results/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
1482
target/executable/io/publish_results/publish_results
Executable file
1482
target/executable/io/publish_results/publish_results
Executable file
File diff suppressed because it is too large
Load Diff
290
target/executable/parallel_map/.config.vsh.yaml
Normal file
290
target/executable/parallel_map/.config.vsh.yaml
Normal file
@@ -0,0 +1,290 @@
|
||||
name: "parallel_map"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Toni Verbeiren"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
github: "tverbeiren"
|
||||
linkedin: "verbeiren"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist and CEO"
|
||||
argument_groups:
|
||||
- name: "Input arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input_r1"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_r2"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--genomeDir"
|
||||
description: "STAR reference directory"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--barcodes"
|
||||
description: "The barcodes/wells to process"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- name: "Barcode arguments"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--umiLength"
|
||||
description: "The length of the UMIs"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--limitBAMsortRAM"
|
||||
info: null
|
||||
default:
|
||||
- "10000000000"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Runtime arguments"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--runThreadN"
|
||||
description: "Number of threads to use for a single STAR execution."
|
||||
info: null
|
||||
default:
|
||||
- 1
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Output arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Location of the output folders, 1 folder per barcode. The value\
|
||||
\ used\nfor this argument must contain a '*', which will be replaced with the\n\
|
||||
barcode to form the final output location for that barcode.\n"
|
||||
info: null
|
||||
default:
|
||||
- "./*"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--joblog"
|
||||
description: "Where to store the log file listing all the jobs."
|
||||
info: null
|
||||
default:
|
||||
- "execution_log.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "bash_script"
|
||||
path: "script.sh"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "STAR"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference\
|
||||
\ (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n"
|
||||
test_resources:
|
||||
- type: "bash_script"
|
||||
path: "test.sh"
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "debian:stable-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
- "wget"
|
||||
- "automake"
|
||||
- "make"
|
||||
- "gcc"
|
||||
- "g++"
|
||||
- "zlib1g-dev"
|
||||
- "parallel"
|
||||
- "file"
|
||||
interactive: false
|
||||
- type: "docker"
|
||||
copy:
|
||||
- "STAR /usr/local/bin/$STAR_BINARY"
|
||||
build_args:
|
||||
- "STAR_V=2.7.6a"
|
||||
env:
|
||||
- "STAR_SOURCE=\"https://github.com/alexdobin/STAR/archive/refs/tags/$STAR_V.tar.gz\""
|
||||
- "STAR_TARGET=\"/app/star-$STAR_V.tar.gz\""
|
||||
- "STAR_INSTALL_DIR=\"/app/STAR-$STAR_V\""
|
||||
- "STAR_BINARY=STAR"
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/parallel_map/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/parallel_map"
|
||||
executable: "target/executable/parallel_map/parallel_map"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
BIN
target/executable/parallel_map/STAR
Executable file
BIN
target/executable/parallel_map/STAR
Executable file
Binary file not shown.
108
target/executable/parallel_map/nextflow_labels.config
Normal file
108
target/executable/parallel_map/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
1636
target/executable/parallel_map/parallel_map
Executable file
1636
target/executable/parallel_map/parallel_map
Executable file
File diff suppressed because it is too large
Load Diff
235
target/executable/report/create_report/.config.vsh.yaml
Normal file
235
target/executable/report/create_report/.config.vsh.yaml
Normal file
@@ -0,0 +1,235 @@
|
||||
name: "create_report"
|
||||
namespace: "report"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--eset"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_report"
|
||||
info: null
|
||||
example:
|
||||
- "report.html"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "r_script"
|
||||
path: "script.R"
|
||||
is_executable: true
|
||||
- type: "r_script"
|
||||
path: "template.Rmd"
|
||||
is_executable: true
|
||||
- type: "r_script"
|
||||
path: "plateLayouts.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "OutputSTARsolo.png"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Create a basic QC report in HTML format based on a number of esets.\n"
|
||||
test_resources:
|
||||
- type: "r_script"
|
||||
path: "test.R"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "rocker/r2u:24.04"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
- "pandoc"
|
||||
interactive: false
|
||||
- type: "r"
|
||||
cran:
|
||||
- "ggplot2"
|
||||
- "knitr"
|
||||
- "gridExtra"
|
||||
- "RColorBrewer"
|
||||
- "processx"
|
||||
- "whisker"
|
||||
- "rmarkdown"
|
||||
- "bookdown"
|
||||
- "data.table"
|
||||
- "platetools"
|
||||
- "htmltools"
|
||||
- "DT"
|
||||
- "logger"
|
||||
- "bit64"
|
||||
bioc:
|
||||
- "Biobase"
|
||||
- "ComplexHeatmap"
|
||||
script:
|
||||
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
|
||||
, getOption(\"repos\")))"
|
||||
bioc_force_install: false
|
||||
test_setup:
|
||||
- type: "r"
|
||||
packages:
|
||||
- "testthat"
|
||||
- "R.utils"
|
||||
bioc_force_install: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/report/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/report/create_report"
|
||||
executable: "target/executable/report/create_report/create_report"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
BIN
target/executable/report/create_report/OutputSTARsolo.png
Normal file
BIN
target/executable/report/create_report/OutputSTARsolo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 77 KiB |
1162
target/executable/report/create_report/create_report
Executable file
1162
target/executable/report/create_report/create_report
Executable file
File diff suppressed because it is too large
Load Diff
108
target/executable/report/create_report/nextflow_labels.config
Normal file
108
target/executable/report/create_report/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
430
target/executable/report/create_report/plateLayouts.R
Executable file
430
target/executable/report/create_report/plateLayouts.R
Executable file
@@ -0,0 +1,430 @@
|
||||
|
||||
#' Displays the annotation of the wells in a plateLayout
|
||||
#' @param plateData a data.table object containing the information
|
||||
#' of the plate. This must contain a "WellID".
|
||||
#' @param plateName The plate name
|
||||
#' @param valueVariable The name of the variable in 'plateData' to
|
||||
#' be visualized in a plate layout.
|
||||
#' @param textVariable The name of the variable in 'plateData' to be
|
||||
#' shown in the wells of the plate layout. If NULL, the valueVariable
|
||||
#' is shown.
|
||||
#' @param colours A named character vector containing the colours
|
||||
#' for the different levels of the valuevariable. The names should
|
||||
#' correspond to the dose levels. if not specified, a scheme of blues
|
||||
#' will be provided.
|
||||
#' @param breaks Numeric vector indicating breaks for plot coloring.
|
||||
#' @param colourWellText Colour to display the text in the wells.
|
||||
#' @param layout Integer vector of length two with number of rows and
|
||||
#' colums in a plate, e.g. \code{c(16,24)}
|
||||
#' @param legend.title A title for the legend
|
||||
#' @param plot.title A title for the plot, will be contracted
|
||||
#' with the plate name
|
||||
#' @param ... additional arguments for \code{plateLayout.default} function
|
||||
#' @import data.table
|
||||
#' @importFrom platetools fill_plate
|
||||
#' @export
|
||||
plateLayout.annotation <- function(
|
||||
plateData,
|
||||
plateName = character(),
|
||||
valueVariable = "Dose",
|
||||
textVariable = NULL,
|
||||
breaks = NULL, colours = NULL,
|
||||
colourWellText = "black",
|
||||
layout = c(16, 24),
|
||||
legend.title = "Dose",
|
||||
plot.title = "Plate Annotation - ",
|
||||
textFontSize = 9, ...
|
||||
) {
|
||||
WellID <- Label <- NULL
|
||||
|
||||
if (!(all(c("WellID", "SampleName") %in% colnames(plateData)))) {
|
||||
stop(" 'WellID' and 'SampleName' column required in plateData object")
|
||||
}
|
||||
|
||||
#Check WellID Format
|
||||
checkWellID <- grepl("^[[:upper:]]{1,2}[[:digit:]]{1,2}$", plateData$WellID)
|
||||
if(!all(checkWellID)){
|
||||
stop("WellID does not have the correct format")
|
||||
}
|
||||
|
||||
|
||||
plateData[, WellID := paste0(
|
||||
sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
sprintf(
|
||||
"%02d", as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
|
||||
)
|
||||
)]
|
||||
|
||||
plateData <- platetools::fill_plate(plateData, "WellID", plate = layout[1]*layout[2])
|
||||
|
||||
plateData$column <- factor(
|
||||
sprintf(
|
||||
"%02d",
|
||||
as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
|
||||
),
|
||||
levels = sprintf("%02d", seq(1, layout[2]))
|
||||
)
|
||||
plateData$row <- factor(sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
levels = LETTERS[seq(1, layout[1])])
|
||||
|
||||
if (!is.null(valueVariable)){
|
||||
plateData[, values := as.character(plateData[, ..valueVariable][[1]])]
|
||||
valueVar <- "values"
|
||||
}else{
|
||||
plateData[, values := "grey"]
|
||||
valueVar <- "values"
|
||||
colours <- setNames("grey", "grey")
|
||||
}
|
||||
|
||||
|
||||
if (is.null(colours)) {
|
||||
|
||||
blues <- colorRampPalette(c("#d6e0ff", "#2171B5"))
|
||||
greens <- colorRampPalette(c("light green", "dark green"))
|
||||
|
||||
numLevels <- sort(as.numeric(as.character(unique(plateData[, values])[
|
||||
grepl(
|
||||
"^[[:digit:]]+([.][[:digit:]]+)?$",
|
||||
trimws(unique(plateData[, values]))
|
||||
)
|
||||
])))
|
||||
otherLevels <- sort(as.character(unique(plateData[, values])[
|
||||
!grepl(
|
||||
"^[[:digit:]]+([.][[:digit:]]+)?$",
|
||||
trimws(unique(plateData[,values]))
|
||||
)
|
||||
]))
|
||||
|
||||
colours <- c(blues(length(numLevels)), greens(length(otherLevels)), "red")
|
||||
names(colours) <- c(numLevels, otherLevels, "failed")
|
||||
}
|
||||
|
||||
if (!is.null(textVariable)) {
|
||||
plateData[,
|
||||
Label := do.call(paste, c(.SD, sep = "\n ")),
|
||||
.SDcols = textVariable
|
||||
]
|
||||
plateData[, Label := gsub("-", "-\n", Label)]
|
||||
plateData[, Label := gsub("_", "_\n", Label)]
|
||||
textVar <- "Label"
|
||||
} else {
|
||||
textVar <- NULL
|
||||
}
|
||||
|
||||
|
||||
if (is.null(breaks)){
|
||||
breaks <- seq_len(length(colours))
|
||||
}
|
||||
|
||||
plateLayout(
|
||||
plateData = plateData, valueVariable = valueVar,
|
||||
textVariable = textVar, plateName = plateName,
|
||||
breaks = breaks, colourWellText = colourWellText,
|
||||
legend.title = legend.title, layout = layout,
|
||||
colours = colours, plot.title = plot.title,
|
||||
textFontSize = textFontSize, ...
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
|
||||
#' Create a heatmap of values in a plateLayout view. The values can be
|
||||
#' library sizes, number of genes, qcScore (0/1) or a factor.
|
||||
#' @param plateData A data.table of the values to be visualized with
|
||||
#' at least the column of interest (specified in 'varOfInterest')
|
||||
#' and a 'WellID' column indicating the wells in the plate. The WellID
|
||||
#' is a combination of a letter (row in the plate) and an integer
|
||||
#' (column in the plate).
|
||||
#' @param valueVariable The name of the variable in 'plateData'
|
||||
#' to be visualized in a plate layout
|
||||
#' @param textVariable The name of the variable in 'plateData'
|
||||
#' to be shown in the wells of the plate layout. Defaults to the
|
||||
#' valueVariable and if NULL, no text will be displayed.
|
||||
#' @param breaks Numeric vector indicating breaks for plot coloring.
|
||||
#' @param colours Colours to be used for levels specified by
|
||||
#' the breaks. If NULL, a colour scheme of purples is shown.
|
||||
#' @param colourWellText Colour to display the text in the wells.
|
||||
#' @param layout Integer vector of length two with number of rows
|
||||
#' and colums in a plate, e.g. \code{c(16,24)}
|
||||
#' @param makeContourColours Logical, whether or not the plate
|
||||
#' layout will contain a contour colours for the wells based on the
|
||||
#' parameters in 'contourColours' and 'categories'
|
||||
#' @param contourVariable The variable used for the contour colouring
|
||||
#' @param contourColours Character vector specifying a colour for
|
||||
#' each range in 'categories'
|
||||
#' @param labelsCategories Character vector specifying the names
|
||||
#' (labels) for each range in 'categories'
|
||||
#' @param categories if contour Variable is not a factor, a numeric
|
||||
#' vector specifying the categories to divide the 'varOfInterest',
|
||||
#' including the lower and upper limits.
|
||||
#' @param plateName The plate name
|
||||
#' @param plot.title A title for the plot, will be contracted with
|
||||
#' the plate name
|
||||
#' @param legend.title A title for the legend
|
||||
#' @param displayHeatmap Logical, whether to display the plateLayout heatmap
|
||||
#' @param saveHeatmap Logical, whether to save the plateLayout heatmap
|
||||
#' @param outputDir The directory where the plateLayout heatmap should be saved
|
||||
#' @param prefix The prefix to the file name of the saved plateLayout heatmap
|
||||
#' @param ... additional arguments for \code{ComplexHeatmap::Heatmap} function
|
||||
#' @importFrom platetools fill_plate
|
||||
#' @importFrom RColorBrewer brewer.pal
|
||||
#' @importFrom ComplexHeatmap Heatmap
|
||||
#' @importFrom circlize colorRamp2
|
||||
#' @importFrom grid grid.text grid.rect gpar legendGrob gpar
|
||||
#' @importFrom grDevices dev.off png
|
||||
#' @importFrom graphics title
|
||||
#' @export
|
||||
plateLayout <- function(
|
||||
plateData, valueVariable, textVariable = valueVariable,
|
||||
breaks = NULL, colours = NULL, colourWellText = "white", textFontSize = 6,
|
||||
layout = c(16, 24), makeContourColours = FALSE, contourVariable = character(),
|
||||
contourColours = c("red", "orange", "seagreen3"),lwdContours = c(1, 1, 1),
|
||||
labelsCategories = c('1', '2', '3'), categories = NULL, plateName = character(),
|
||||
plot.title = character(), legend.title = NULL, legendFontSize = 15,
|
||||
row_split = rep("A", 16), col_split = rep("A", 24), legendFontSizeTitle = 15,
|
||||
displayHeatmap = TRUE, saveHeatmap = FALSE, outputDir = ".", prefix = ""
|
||||
) {
|
||||
WellID <- NULL
|
||||
if (!(all(c("WellID", "SampleName") %in% colnames(plateData)))) {
|
||||
stop(" 'WellID' and 'SampleName' column required in plateData object")
|
||||
}
|
||||
|
||||
|
||||
plateData[, WellID := paste0(
|
||||
sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
sprintf(
|
||||
"%02d",
|
||||
as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
|
||||
)
|
||||
)]
|
||||
|
||||
plateData <- platetools::fill_plate(plateData, "WellID", plate = 384)
|
||||
|
||||
plateData$column <- factor(
|
||||
sprintf("%02d", as.numeric(
|
||||
sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID)
|
||||
)),
|
||||
levels = sprintf("%02d", seq(1, layout[2]))
|
||||
)
|
||||
plateData$row <- factor(sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
|
||||
levels = LETTERS[seq(1, layout[1])])
|
||||
|
||||
|
||||
plateValues <- plateLayoutFormat(
|
||||
plateData,
|
||||
varOfInterest = valueVariable,
|
||||
rows = layout[1],
|
||||
cols = layout[2]
|
||||
)
|
||||
if (!is.null(textVariable)) {
|
||||
plateText <- plateLayoutFormat(
|
||||
plateData, varOfInterest = textVariable,
|
||||
rows = layout[1],
|
||||
cols = layout[2]
|
||||
)
|
||||
}
|
||||
plot.title <- gsub(
|
||||
"^([a-z])", "\\U\\1",
|
||||
gsub("([A-Z])", " \\1",
|
||||
plot.title, perl = TRUE), perl = TRUE
|
||||
)
|
||||
mainTitle <- paste0(plot.title, plateName)
|
||||
plateContourColours <- matrix("", nrow = layout[1], ncol = layout[2])
|
||||
|
||||
if (makeContourColours) {
|
||||
contourData <- plateData[WellType %in% c("nonEmpty", "Treated Wells"), ]
|
||||
|
||||
if (is.numeric(contourData[, ..contourVariable][[1]])) {
|
||||
contourData$contours <- cut(
|
||||
contourData[, ..contourVariable][[1]],
|
||||
categories, left = TRUE,
|
||||
right = TRUE,
|
||||
labels = labelsCategories)
|
||||
}
|
||||
else {
|
||||
contourData$contours <- contourData[, ..contourVariable][[1]]
|
||||
}
|
||||
names(contourColours) <- labelsCategories
|
||||
names(lwdContours) <- labelsCategories
|
||||
for (i in seq_len(layout[1])) {
|
||||
for (j in seq_len(layout[2])) {
|
||||
tryCatch({
|
||||
sampleHit <- which(
|
||||
as.character(contourData$WellID) == paste0(
|
||||
LETTERS[i], sprintf("%02d", j)
|
||||
)
|
||||
)
|
||||
if (length(sampleHit) == 1) {
|
||||
plateContourColours[i, j] <- as.character(
|
||||
contourData[sampleHit,'contours'][[1]]
|
||||
)
|
||||
}
|
||||
},
|
||||
error = function(e) {
|
||||
print(paste0(LETTERS[i], sprintf("%02d", j), " is missing."))
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
plateValues$contours <- plateContourColours
|
||||
colnames(plateValues$values) <- seq_len(ncol(plateValues$values))
|
||||
|
||||
if (is.null(breaks)) {
|
||||
breakValues <- plateValues$values
|
||||
breakValues[which(is.na(breakValues))] <- 0
|
||||
if (all(breakValues >= 0)) {
|
||||
breaks <- computeBreaks(7, max(plateValues$values, na.rm = TRUE))
|
||||
} else {
|
||||
breaks <- quantile(plateValues$values, probs = seq(0, 1, 0.125))
|
||||
}
|
||||
}
|
||||
|
||||
if (is.null(colours)) {
|
||||
colours <- tryCatch({
|
||||
colorRamp2(
|
||||
breaks = breaks,
|
||||
colors = brewer.pal(length(breaks), "Purples")
|
||||
)
|
||||
},
|
||||
error = function(cond) {
|
||||
return(c("#9370DB", "white"))
|
||||
})
|
||||
}
|
||||
ht <- Heatmap(
|
||||
plateValues$values,
|
||||
column_title = mainTitle, column_title_side = "top",
|
||||
rect_gp = gpar(lwd = 0.4),
|
||||
cluster_rows = FALSE, cluster_columns = FALSE,
|
||||
col = colours, row_title = NULL,
|
||||
row_split = row_split, column_split = col_split,
|
||||
row_names_side = "left",
|
||||
cluster_row_slices = FALSE,
|
||||
cluster_column_slices = FALSE,
|
||||
show_heatmap_legend = TRUE,
|
||||
heatmap_legend_param = list(
|
||||
title = ifelse(
|
||||
is.null(legend.title),
|
||||
paste0(valueVariable, "\n"),
|
||||
paste0(legend.title, "\n")
|
||||
),
|
||||
grid_height = unit(9, "mm"), border = "black",
|
||||
labels_gp = gpar(fontsize = legendFontSize),
|
||||
title_gp = gpar(fontsize = legendFontSizeTitle)
|
||||
),
|
||||
cell_fun = function(j, i, x, y, width, height, fill) {
|
||||
if (is.na(plateValues$values[i, j])) {
|
||||
grid.rect(
|
||||
x, y, width, height,
|
||||
gp = gpar(fill = "white", alpha = 0.7, lwd = 0.7, col = "white")
|
||||
)
|
||||
}
|
||||
else if (!is.null(textVariable)) {
|
||||
grid.text(
|
||||
plateText$values[i, j], x, y,
|
||||
just = "centre",
|
||||
gp = gpar(fontsize = textFontSize, col = colourWellText)
|
||||
)
|
||||
}
|
||||
if (makeContourColours) {
|
||||
if (!is.na(plateValues$contours[i, j])) {
|
||||
grid.rect(
|
||||
x, y, width, height,
|
||||
gp = gpar(
|
||||
col = contourColours[as.character(plateValues$contours[i, j])],
|
||||
fill = NA,
|
||||
lwd = lwdContours[as.character(plateValues$contours[i, j])]
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if (displayHeatmap) {
|
||||
print(ht)
|
||||
}
|
||||
if (saveHeatmap) {
|
||||
png(
|
||||
file.path(
|
||||
outputDir,
|
||||
paste0(prefix,gsub(" |-", "",plot.title), "_", plateName, ".png")
|
||||
),
|
||||
width = 30, height = 10, units = "cm", res = 1200
|
||||
)
|
||||
print(ht)
|
||||
dev.off()
|
||||
}
|
||||
|
||||
return(ht)
|
||||
}
|
||||
|
||||
|
||||
#' Return numerical matrix with number of reads that corresponds to the
|
||||
#' plate layout
|
||||
#' @param data A data.frame of the values to be visualized with at least
|
||||
#' the columnof interest (specified in 'varOfInterest') and a 'WellID' column
|
||||
#' indicating the wells in the plate. The WellID is a combination of a
|
||||
#' letter (row in the plate) and an integer (column in the plate).
|
||||
#' @param varOfInterest The name of the variable in 'data' to be visualized
|
||||
#' in a plate layout
|
||||
#' @param rows number of rows in a plate layout
|
||||
#' @param cols number of columns in a plate layout
|
||||
#' @param verbose if \code{TRUE}, samples missing from the plate
|
||||
#' will be reported
|
||||
#' @export
|
||||
plateLayoutFormat <- function(
|
||||
data, varOfInterest,
|
||||
rows = 16, cols = 24,
|
||||
verbose = FALSE
|
||||
) {
|
||||
plateValues <- matrix(NA, nrow = rows, ncol = cols)
|
||||
for (i in seq_len(rows)) {
|
||||
for (j in seq_len(cols)) {
|
||||
tryCatch({
|
||||
sampleHit <- which(
|
||||
as.character(data$WellID) == paste0(LETTERS[i], sprintf("%02d", j))
|
||||
)
|
||||
if(length(sampleHit) == 1){
|
||||
plateValues[i, j] <- data[sampleHit, ..varOfInterest][[1]]
|
||||
}
|
||||
},
|
||||
error = function(e) {
|
||||
if (verbose == TRUE) {
|
||||
print(paste0(LETTERS[i], sprintf("%02d", j), " is missing."))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
row.names(plateValues) <- LETTERS[1:rows]
|
||||
return(list("values" = plateValues))
|
||||
}
|
||||
|
||||
|
||||
|
||||
#' Helper function to automate break selection for raw count data
|
||||
#'
|
||||
#' This function creates an exponentially increasing vector for given number
|
||||
#' breaks between zero and some element of choice. It is particularly useful for
|
||||
#' raw counts or raw counts per million.
|
||||
#'
|
||||
#' @param nBreaks Number of breaks to be generated
|
||||
#' @param maxElement Maximum value of data entries
|
||||
#' @export
|
||||
computeBreaks <- function(nBreaks, variable) {
|
||||
|
||||
maxElement <- max(variable, na.rm = TRUE)
|
||||
if (length(unique(variable)) == 1) {
|
||||
breaks <- c(0, 0.5, ifelse(maxElement < 1, 1, maxElement))
|
||||
} else {
|
||||
coefSystem <- solve(
|
||||
rbind(c(1, 1), c(1, (nBreaks - 1)))) %*% c(0, log(maxElement)
|
||||
)
|
||||
coefExp <- c(exp(coefSystem[1]), coefSystem[2])
|
||||
breaks <- coefExp[1] * exp((1:(nBreaks - 1)) * coefExp[2])
|
||||
}
|
||||
return(c(0, breaks))
|
||||
}
|
||||
977
target/executable/report/create_report/template.Rmd
Executable file
977
target/executable/report/create_report/template.Rmd
Executable file
@@ -0,0 +1,977 @@
|
||||
---
|
||||
title: "Exploratory Data Report"
|
||||
date: "`r format(Sys.time(), '%d %B, %Y')`"
|
||||
editor_options:
|
||||
chunk_output_type: console
|
||||
output:
|
||||
oaStyle::html_report
|
||||
# parameters which are overwritten by the script
|
||||
params:
|
||||
outputDir: 'output/'
|
||||
esets:
|
||||
- sample1.rds
|
||||
- sample2.rds
|
||||
---
|
||||
|
||||
<!---
|
||||
Copy this template in your working directory (where you want to run the report).
|
||||
This template can be used as a starting document to run a preliminary DRUGseq report
|
||||
-->
|
||||
|
||||
<!---
|
||||
Use full page width
|
||||
-->
|
||||
|
||||
<style type="text/css">
|
||||
div.main-container {
|
||||
max-width: 1600px !important;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
|
||||
```{r params, eval = TRUE, include = FALSE}
|
||||
outputDir <- params$outputDir
|
||||
esets <- params$esets
|
||||
```
|
||||
|
||||
|
||||
```{r outputDir, echo = FALSE}
|
||||
## Required: ABSOLUTE outputDir
|
||||
outputDir <- file.path(outputDir)
|
||||
|
||||
# When working on a windows computer it should be
|
||||
# "/Users/..." instead of "C:/Users/..."
|
||||
if (.Platform$OS.type == "windows") {
|
||||
outputDir <- paste0(
|
||||
"/",
|
||||
paste(
|
||||
unlist(strsplit(outputDir, split = "/"))[-1], collapse = "/"
|
||||
),
|
||||
"/"
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```{r optionsChunkDoNotModify, echo = FALSE, message = FALSE, warning=FALSE}
|
||||
|
||||
## Chunk with options for knitr. This chunk should not be modified.
|
||||
knitr::opts_chunk$set(
|
||||
eval = TRUE,
|
||||
echo = FALSE,
|
||||
message = FALSE,
|
||||
cache = FALSE,
|
||||
warning = FALSE,
|
||||
error = FALSE,
|
||||
comment = NA, #"#",
|
||||
tidy = FALSE,
|
||||
collapse = TRUE,
|
||||
out.width = "100%",
|
||||
fig.width = 20,
|
||||
fig.height = 10,
|
||||
results = "asis")
|
||||
|
||||
knitr::opts_knit$set(root.dir = getwd())
|
||||
|
||||
options(warn = 1, width = 200)
|
||||
|
||||
```
|
||||
|
||||
```{r libraries_and_functions}
|
||||
source("plateLayouts.R")
|
||||
library(ComplexHeatmap)
|
||||
library(data.table)
|
||||
library(ggplot2)
|
||||
library(knitr)
|
||||
library(Biobase)
|
||||
library(gridExtra)
|
||||
library(RColorBrewer)
|
||||
```
|
||||
|
||||
|
||||
```{r dataImport}
|
||||
|
||||
# Create esetList
|
||||
esetList <- sapply(
|
||||
esets, simplify = FALSE,
|
||||
USE.NAMES = TRUE,
|
||||
function(eset_raw) {
|
||||
if (!file.exists(eset_raw)) {
|
||||
stop(paste0("Provided path '", eset_raw, "' is not a file."))
|
||||
}
|
||||
eset <- readRDS(eset_raw)
|
||||
}
|
||||
)
|
||||
pools <- sapply(esetList, function(eset) {
|
||||
unique(eset$PoolName)
|
||||
})
|
||||
names(esetList) <- unlist(pools)
|
||||
|
||||
# Create qcData
|
||||
pDataList <- lapply(esetList, function(eset) data.table(pData(eset)))
|
||||
qcData <- rbindlist(pDataList, fill = TRUE)
|
||||
|
||||
textVars <- "SampleName"
|
||||
annotationVar <- "PoolName"
|
||||
|
||||
if (!"SampleName" %in% names(qcData)) {
|
||||
qcData[, SampleName := paste0(PoolName, "_", WellBC)]
|
||||
}
|
||||
qcData[, log10LibSize := round(log10(NumberOfInputReads))]
|
||||
qcData[, (annotationVar) := lapply(.SD, as.factor), .SDcols = annotationVar]
|
||||
|
||||
|
||||
colourList <- list()
|
||||
Design_levels <- sort(
|
||||
as.character(unique(qcData[, ..annotationVar][[1]])),
|
||||
decreasing = TRUE
|
||||
)
|
||||
|
||||
if (length(Design_levels) == 1) {
|
||||
colours <- c("#d6e0ff", "lightgrey")
|
||||
names(colours) <- c(Design_levels, "Empty")
|
||||
colourList[[annotationVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotationVar,
|
||||
"text" = textVars
|
||||
)
|
||||
}else if (length(Design_levels) == 2) {
|
||||
colours <- c("#d6e0ff", "#FF9999")
|
||||
|
||||
names(colours) <- c(Design_levels)
|
||||
colourList[[annotationVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotationVar,
|
||||
"text" = textVars
|
||||
)
|
||||
} else if (length(Design_levels) <= 20) {
|
||||
|
||||
if (length(Design_levels) > 12) {
|
||||
colours <- c(
|
||||
brewer.pal(12, "Set3"),
|
||||
brewer.pal((length(Design_levels) - 12),
|
||||
"Pastel2")
|
||||
)
|
||||
} else {
|
||||
colours <- c(brewer.pal(length(Design_levels), "Set3"))
|
||||
}
|
||||
|
||||
names(colours) <- c(Design_levels)
|
||||
colourList[[annotationVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotationVar,
|
||||
"text" = textVars
|
||||
)
|
||||
} else {
|
||||
colours <- c("#d6e0ff")
|
||||
names(colours) <- c("nonEmpty")
|
||||
colourList[[annotVar]] <- list(
|
||||
"colours" = colours,
|
||||
"annotVar" = annotVar,
|
||||
"text" = annotVar
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
# Pool Description
|
||||
|
||||
Per pool within this study, there are several pool layout plots shown, based on the
|
||||
|
||||
* number of STAR input reads (= library size)
|
||||
|
||||
* log10 transformed number of STAR input reads
|
||||
|
||||
* number of detected UMIs
|
||||
|
||||
* number of detected genes
|
||||
|
||||
* number of chromosomal reads
|
||||
|
||||
* percentage of ERCC
|
||||
|
||||
* percentage of mitochondria
|
||||
|
||||
|
||||
> The values for the different samples within each pool is expected to be comparable if the content of the different pools is equally diverse.
|
||||
|
||||
```{r plateAnnotation, out.width = "100%",fig.width = 20, fig.height= 10}
|
||||
|
||||
plateVars <- c("NumberOfInputReads", "log10LibSize", "NumberOfMappedReads",
|
||||
"NumberOfChromReads", "NumberOfUMIs", "NumberOfGenes",
|
||||
"pctMT", "pctERCC")
|
||||
|
||||
breaksVars <- lapply(
|
||||
plateVars,
|
||||
function(var) {
|
||||
computeBreaks(7, qcData[, ..var])
|
||||
}
|
||||
)
|
||||
names(breaksVars) <- plateVars
|
||||
|
||||
for (pool in pools){
|
||||
cat("\n\n")
|
||||
cat(paste0("## ", pool, " {.tabset} \n\n"))
|
||||
poolData <- qcData[PoolName == pool]
|
||||
lapply(plateVars, function(plateVar) {
|
||||
cat("\n\n")
|
||||
cat(sprintf("### %s {.unnumbered}", plateVar))
|
||||
cat("\n\n")
|
||||
plateLayout(
|
||||
poolData, valueVariable = plateVar,
|
||||
textFontSize = 10, legendFontSize = 12,
|
||||
plateName = pool, plot.title = "libSize - ",
|
||||
legend.title = "libSize", breaks = breaksVars[[plateVar]]
|
||||
)
|
||||
cat("\n\n")
|
||||
})
|
||||
cat("\n\n")
|
||||
}
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
|
||||
# Data Distributions
|
||||
|
||||
|
||||
## Reads Distributions {.tabset}
|
||||
|
||||
The 4 box plots below represent the distributions per pool of the different samples based on:
|
||||
|
||||
* the number of STAR input reads
|
||||
|
||||
* the number of STAR mapped reads
|
||||
|
||||
* the percentage of STAR mapped reads
|
||||
|
||||
* the number of detected genes
|
||||
|
||||
> The distributions contribute to the QC metrics mentioned in Par 3. The higher these values, the better.
|
||||
> The data range for the different plates is expected to be comparable if the content of the different plates is equally diverse.
|
||||
|
||||
|
||||
### Number of Input Reads {.tabset .unnumbered}
|
||||
|
||||
```{r settings_1}
|
||||
|
||||
nColPlots = 1
|
||||
figHeight = 7
|
||||
|
||||
```
|
||||
|
||||
#### Distribution {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r boxplots_input_plate, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(
|
||||
x = PoolName,
|
||||
y = NumberOfInputReads, colour = PoolName
|
||||
)
|
||||
) + geom_boxplot() + ylab("Number of Input Reads") +
|
||||
ggtitle("Number of Input Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
### Number of Mapped Reads {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.unnumbered}
|
||||
|
||||
```{r boxplots_mapped_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfMappedReads, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
#### pct Mapped Reads {.unnumbered}
|
||||
|
||||
```{r boxplots_pctMapped_plate, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = PctMappedReads, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() +
|
||||
ylab("pct Mapped Reads") +
|
||||
ggtitle("pct Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### Number of Chromosomal Reads {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.unnumbered}
|
||||
|
||||
```{r boxplots_chrom_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfChromReads, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("Number of Chromosomal Reads") +
|
||||
ggtitle("Number of Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
#### pct Chromosomal Reads {.unnumbered}
|
||||
|
||||
```{r boxplots_pctChrom_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = pctChrom, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("pct Chromosomal Reads") +
|
||||
ggtitle("pct Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### Number of UMIs {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r boxplots_umi_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfUMIs, colour = PoolName)
|
||||
) + geom_boxplot() + ylab("Number of UMIs") +
|
||||
ggtitle('Number of UMIs') +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
#### Density distribution {.unnumbered}
|
||||
|
||||
```{r density_numberOfUMIs}
|
||||
|
||||
## Pre-filtering data exploration
|
||||
dt_plot <- melt(
|
||||
qcData,
|
||||
id.vars = c("SampleName", "PoolName", "WellID"),
|
||||
measure.vars = c("NumberOfInputReads", "NumberOfMappedReads", "NumberOfUMIs")
|
||||
)
|
||||
|
||||
readsDensity_plot <- ggplot(dt_plot, aes(value))
|
||||
readsDensity_plot <- readsDensity_plot +
|
||||
geom_density(aes(fill = variable), alpha=0.8) +
|
||||
facet_grid(~ PoolName, scales = "free_x", space = "fixed", drop = TRUE) +
|
||||
geom_vline(
|
||||
xintercept = 5e5,
|
||||
linetype = "dashed",
|
||||
color = "steelblue3", size = 2
|
||||
) +
|
||||
annotate(
|
||||
"text",
|
||||
x = 3.5e5, y = 2e-6, label = "500k",
|
||||
angle = 90, color = "steelblue3", size = 10
|
||||
) +
|
||||
geom_vline(
|
||||
xintercept = 1.5e6, linetype = "dashed",
|
||||
color = "forestgreen", size = 2
|
||||
) +
|
||||
annotate(
|
||||
"text", x = 1.35e6, y = 2e-6, label = "1.5M",
|
||||
angle = 90, color = "forestgreen", size = 10
|
||||
) +
|
||||
labs(
|
||||
title = "Density plot",
|
||||
subtitle = paste0(
|
||||
"# Samples with NumberOfMappedReads > 1.5M: ",
|
||||
length(which(qcData$NumberOfMappedReads > 1.5e6)),
|
||||
"\n# Samples with NumberOfUMIs > 500k: ",
|
||||
length(which(qcData$NumberOfUMIs > 5e5))
|
||||
),
|
||||
caption = paste0("# Total samples (after removing empty): ", nrow(qcData)),
|
||||
x = "Count",
|
||||
fill = "Variable"
|
||||
) +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 5),
|
||||
axis.text.x = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
plot.subtitle = element_text(size = 17),
|
||||
plot.caption = element_text(size = 15),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.y = element_blank(),
|
||||
axis.ticks.y = element_blank(),
|
||||
axis.title.y = element_blank()
|
||||
)
|
||||
readsDensity_plot
|
||||
|
||||
```
|
||||
|
||||
### Number of Genes {.tabset .unnumbered}
|
||||
|
||||
#### Distribution {.unnumbered}
|
||||
|
||||
```{r boxplots_genes_plate, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = NumberOfGenes, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() + ylab("Number of Genes") +
|
||||
ggtitle("Number of Genes") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
## {.tabset .toc-ignore .unnumbered}
|
||||
|
||||
|
||||
In addition, several plots are shown visualizing the efficiency of the reads-to-genes translation:
|
||||
|
||||
* the number of input reads vs the number of mapped reads
|
||||
|
||||
* the number of chromosomal reads vs the number of mapped reads
|
||||
|
||||
* the number of mapped reads per UMI vs the number of mapped reads
|
||||
|
||||
* the number of UNI vs the number of mapped reads
|
||||
|
||||
* the number of mapped reads vs the number of genes
|
||||
|
||||
* the number of chromosomal reads vs the number of genes
|
||||
|
||||
* the number of mapped reads per UMI vs the number of genes
|
||||
|
||||
### Mapping Efficiency {.tabset .unnumbered}
|
||||
|
||||
#### Number of Input Reads {.unnumbered}
|
||||
|
||||
```{r mapping_efficiency_1_plate, fig.height = 7}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfInputReads, y = NumberOfMappedReads, colour = PoolName)
|
||||
) +
|
||||
geom_point() +
|
||||
xlab("Number of Input Reads") +
|
||||
ylab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Mapped Reads vs Number of Input Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
#### Number of Chromosomal Reads {.unnumbered}
|
||||
|
||||
```{r mapping_efficiency_2_plate, fig.height = 7}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfChromReads, y = NumberOfMappedReads, colour = PoolName)
|
||||
) + geom_point() +
|
||||
xlab("Number of Chromosomal Reads") + ylab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Chromosomal Reads vs Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
#### Number of UMI {.unnumbered}
|
||||
|
||||
```{r mapping_efficiency_4_plate, fig.height = 7}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x =NumberOfUMIs, y = NumberOfMappedReads, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ylab("Number of Mapped Reads") + xlab("Number of UMIs ") +
|
||||
ggtitle("Number of UMIs vs Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
### Counting Efficiency {.tabset .unnumbered}
|
||||
|
||||
#### Number of Mapped Reads {.unnumbered}
|
||||
|
||||
```{r gene_efficiency_1_plate, fig.height = 7}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfMappedReads, y = NumberOfGenes, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ylab("Number of Genes") + xlab("Number of Mapped Reads") +
|
||||
ggtitle("Number of Genes vs Number of Mapped Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
#### Number of Chromosomal Reads {.unnumbered}
|
||||
|
||||
```{r gene_efficiency_2_plate, fig.height = 7}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfChromReads, y = NumberOfGenes, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ylab("Number of Genes") + xlab("Number of Chromosomal Reads") +
|
||||
ggtitle("Number of Genes vs Number of Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Sequencing Saturation {.tabset}
|
||||
|
||||
The barplots below represent the sequencing saturation per sample as determined by STAR, split per pool.
|
||||
The HT-RNAseq platform aims for shallow sequencing resulting in relatively low sequencing saturations of 10-20%.
|
||||
In addition, the sequencing saturation vs the number of input reads is shown.
|
||||
|
||||
### Sequencing Saturation {.unnumbered}
|
||||
|
||||
|
||||
|
||||
```{r sequencingSaturation, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = WellID, y = SequencingSaturation, fill = PoolName)
|
||||
) + geom_bar(stat = "identity", position = "dodge") +
|
||||
xlab("Samples") + ggtitle("Sequencing Saturation per Sample") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(1, "lines"),
|
||||
text = element_text(size = 10),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.text.y = element_text(size = 15),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### Sequencing Saturation - Input Reads {.unnumbered}
|
||||
|
||||
|
||||
```{r sequencingSaturation_inputReads, fig.height = figHeight}
|
||||
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfInputReads, y = SequencingSaturation, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ggtitle("Sequencing Saturation vs Number of Input Reads") +
|
||||
theme(strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
### Sequencing Saturation - Mapped Reads {.unnumbered}
|
||||
|
||||
```{r sequencingSaturation_mappedReads, fig.height = figHeight}
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = NumberOfChromReads, y = SequencingSaturation, colour = PoolName)
|
||||
) + geom_point() +
|
||||
ggtitle("Sequencing Saturation vs Number of Chromosomal Reads") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size=10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size=18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
```
|
||||
|
||||
<br>
|
||||
|
||||
## Genomic Origin {.tabset}
|
||||
|
||||
The 3 boxplots below represent, per pool, the distributions of the percentage of reads mapping to:
|
||||
|
||||
* chromosomal regions
|
||||
|
||||
* mitochondrial regions
|
||||
|
||||
* ERCC spike-ins
|
||||
|
||||
The 4th plot summarises the above results across samples per pool.
|
||||
|
||||
The 5th plot shows the percentage of reads mapped to the transcriptome (as counted by STAR). This measurement serves as a proxy for the percentage of reads mapped to exons.
|
||||
|
||||
> The percentage ERCC contributes to the QC metrics mentioned in Par 3. This value is ideally as low as possible (but non-zero to ensure the they have been spiked in) and comparable for the different pools.
|
||||
|
||||
|
||||
|
||||
|
||||
### pctChrom {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r genomicOrigin_chrom_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData, aes(x = PoolName, y = pctChrom, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() +
|
||||
ggtitle("pctChrom") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
### pctMT {.tabset .unnumbered}
|
||||
|
||||
```{r genomicOrigin_mt_plate, fig.height = figHeight}
|
||||
|
||||
ggplot(
|
||||
qcData,
|
||||
aes(x = PoolName, y = pctMT, colour = PoolName)
|
||||
) +
|
||||
geom_boxplot() + ggtitle("pctMT") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
### pctERCC {.tabset .unnumbered}
|
||||
|
||||
|
||||
```{r genomicOrigin_ercc_plate, fig.height = figHeight}
|
||||
ggplot(qcData, aes(x = PoolName, y = pctERCC, colour = PoolName)) +
|
||||
geom_boxplot() +
|
||||
ggtitle("pctERCC") +
|
||||
theme(
|
||||
strip.text.x = element_text(size = 20),
|
||||
panel.spacing = unit(2, "lines"),
|
||||
text = element_text(size = 10),
|
||||
axis.text.y = element_text(angle = 90, size = 14),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title.y = element_text(size = 15),
|
||||
axis.text.x = element_blank(),
|
||||
axis.ticks.x = element_blank()
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
### Genomic Summary {.tabset .unnumbered}
|
||||
|
||||
|
||||
|
||||
```{r genomicOrigin_summary_plate}
|
||||
meanPctChromMTData <- qcData[, .(
|
||||
"pctChrom" = median(pctChrom),
|
||||
"pctMT" = median(pctMT),
|
||||
"pctERCC" = median(pctERCC)
|
||||
), by = PoolName]
|
||||
meanPctChromMTDataLong <- melt(
|
||||
meanPctChromMTData,
|
||||
id.vars = "PoolName",
|
||||
measure.vars = c("pctChrom", "pctMT", "pctERCC"),
|
||||
variable.name = "Origin", value.name = "pct"
|
||||
)
|
||||
ggplot(
|
||||
meanPctChromMTDataLong,
|
||||
aes(fill = Origin, y = pct, x = PoolName)) +
|
||||
geom_bar(position = "stack", stat = "identity") +
|
||||
ggtitle("Genomic Origin") +
|
||||
theme(
|
||||
text = element_text(size = 10),
|
||||
axis.text = element_text(angle = 90, size = 15),
|
||||
plot.title = element_text(size = 18),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 17),
|
||||
axis.title = element_text(size = 15)
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
# Depletion {.tabset}
|
||||
|
||||
<div align="center">
|
||||
```{r depletion}
|
||||
|
||||
|
||||
for (eset_name in pools) {
|
||||
cat("\n\n")
|
||||
cat(paste0("## ", eset_name, " {.unnumbered}"))
|
||||
cat("\n\n")
|
||||
|
||||
eset <- esetList[[eset_name]]
|
||||
average_reads <- sort(apply(exprs(eset), 1, mean), decreasing = TRUE)
|
||||
plotData <- data.table(
|
||||
ENSGID = names(average_reads),
|
||||
av_count = average_reads
|
||||
)
|
||||
|
||||
gen_descript <- data.table(
|
||||
ENSGID = eset@featureData@data$gene_id,
|
||||
Description = eset@featureData@data$GENENAME
|
||||
)
|
||||
order_gen_descript <- gen_descript[
|
||||
match(plotData$ENSGID, gen_descript$ENSGID),
|
||||
]
|
||||
|
||||
g <- ggplot(
|
||||
plotData[c(1:100)],
|
||||
aes(x = reorder(ENSGID, -av_count), y = av_count)
|
||||
) + geom_bar(stat = "identity") +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 12),
|
||||
axis.text.y = element_text(size = 12),
|
||||
legend.text = element_text(size = 15),
|
||||
legend.title = element_text(size = 15),
|
||||
axis.title = element_text(size = 18),
|
||||
plot.title = element_text(size = 20)
|
||||
) + ylab("Average Counts") + xlab("Genes")
|
||||
|
||||
print(g)
|
||||
|
||||
cat("\n\n")
|
||||
cat("<br>")
|
||||
cat("<br>")
|
||||
|
||||
print(htmltools::tagList((DT::datatable(order_gen_descript[1:100, ]))))
|
||||
}
|
||||
```
|
||||
</div>
|
||||
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
# Glossary {.unnumbered}
|
||||
|
||||
|
||||
## Read {.unlisted .unnumbered}
|
||||
|
||||
A read is a oligonucleotide (a short RNA fragment) that has been sequenced. It consists of a fixed number of base pairs (bp) and therefore has a specific read length.
|
||||
|
||||
|
||||
|
||||
## Input Read {.unlisted .unnumbered}
|
||||
|
||||
Each read of the fastq file used as input to the STAR aligner is considered an input read.
|
||||
|
||||
|
||||
|
||||
## Read With Valid Barcode {.unlisted .unnumbered}
|
||||
|
||||
A read with a valid barcode is a read for which the barcode matches the white list of barcodes under the given restriction of the number of allowed mismatches. The number of reads with a valid barcode is lower or equal to the number of input reads.
|
||||
|
||||
|
||||
|
||||
## Mapped Read {.unlisted .unnumbered}
|
||||
|
||||
A read that has been aligned against the reference genome and for which one or more suitable matching locations have been found is a mapped read. Depending on the number of allowed mismatches this might or might not be be an exact match. The number of mapped reads is lower or equal to the number of reads with a valid barcode.
|
||||
|
||||
|
||||
|
||||
## Uniquely Mapped Read {.unlisted .unnumbered}
|
||||
|
||||
A read for which one and only one suitable matching location in the reference genome was found is an uniquely mapped read. The number of uniquely mapped reads is lower or equal to the number of mapped reads.
|
||||
|
||||
|
||||
|
||||
## Counted Read {.unlisted .unnumbered}
|
||||
|
||||
A mapped read will only be counted if it overlaps (1 nucleotide or more) with one and only one gene. The number of counted reads is lower or equal to the number of (uniquely) mapped reads.
|
||||
|
||||
|
||||
|
||||
## UMIs {.unlisted .unnumbered}
|
||||
|
||||
Unique molecular identifiers (UMI) are short sequences in order to uniquely tag each molecule in a sample library. Sequencing with UMIs allows bioinformatics software to filter out duplicate reads and PCR errors with a high level of accuracy and report unique reads.
|
||||
|
||||
The reported UMIs is the number of UMIs among the set of reads that map to an unique gene, i.e the number of reads is deduplicated.
|
||||
|
||||
|
||||
|
||||
## pctERCC {.unlisted .unnumbered}
|
||||
|
||||
The percentage of reads mapping to the ERCC genes among the total number of **mapped** reads.
|
||||
|
||||
|
||||
|
||||
## pctMT {.unlisted .unnumbered}
|
||||
|
||||
The percentage of reads mapping to the MT genes among the total number of **mapped** reads.
|
||||
|
||||
|
||||
|
||||
## Sequencing Saturation {.unlisted .unnumbered}
|
||||
|
||||
The sequencing saturation is a measure of the fraction of library complexity. The inverse of one minus the sequencing saturation can be interpreted as the number of additional reads it would take to detect a new transcript. Consequently, a low sequencing saturation indicates a shallow sequencing in which a new transcript could be discovered with a few reads.
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
<center>
|
||||

|
||||
</center>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
232
target/executable/stats/combine_star_logs/.config.vsh.yaml
Normal file
232
target/executable/stats/combine_star_logs/.config.vsh.yaml
Normal file
@@ -0,0 +1,232 @@
|
||||
name: "combine_star_logs"
|
||||
namespace: "stats"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--barcodes"
|
||||
description: "Barcodes responding to the respective log files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--star_logs"
|
||||
description: "Paths to the STAR log files (most frequently called Log.final.out)\n"
|
||||
info: null
|
||||
example:
|
||||
- "Log.final.out"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--gene_summary_logs"
|
||||
description: "Paths to the Summary.csv files from the STAR Solo output. Can be\
|
||||
\ found in\nthe 'Solo.out/Gene' folder relative to the root of the STAR output\
|
||||
\ directory. \n"
|
||||
info: null
|
||||
example:
|
||||
- "Summary.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--reads_per_gene_logs"
|
||||
description: "Paths to the 'ReadsPerGene.out.tab' files as output by STAR.\n"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Tab-delimited file describing for each barcode (as the rows), the\
|
||||
\ metrics (as columns)\ngathered from the different input files. \n"
|
||||
info: null
|
||||
default:
|
||||
- "starLogs.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/combine_star_logs/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/combine_star_logs"
|
||||
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1486
target/executable/stats/combine_star_logs/combine_star_logs
Executable file
1486
target/executable/stats/combine_star_logs/combine_star_logs
Executable file
File diff suppressed because it is too large
Load Diff
108
target/executable/stats/combine_star_logs/nextflow_labels.config
Normal file
108
target/executable/stats/combine_star_logs/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,216 @@
|
||||
name: "generate_pool_statistics"
|
||||
namespace: "stats"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
|
||||
column) and the number of genes on that chromosome that had at least one\nread\
|
||||
\ mapped to it (NumberOfGenes).\n"
|
||||
info: null
|
||||
default:
|
||||
- "processedBamFile_well1.tsv"
|
||||
- "processedBamfile_well2.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChromPool"
|
||||
description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
|
||||
\ files. Describes\nper chromosome (as columns) the number of reads, as well\
|
||||
\ as the total number \nof reads per cell barcode and the percentage of nuclear,\
|
||||
\ ERCC and mitochondrial\nreads.\n"
|
||||
info: null
|
||||
example:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/generate_pool_statistics/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/generate_pool_statistics"
|
||||
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1205
target/executable/stats/generate_pool_statistics/generate_pool_statistics
Executable file
1205
target/executable/stats/generate_pool_statistics/generate_pool_statistics
Executable file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,296 @@
|
||||
name: "generate_well_statistics"
|
||||
namespace: "stats"
|
||||
version: "v0.3.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
description: "The .bam file as returned by the mapping tool STAR."
|
||||
info: null
|
||||
example:
|
||||
- "input.bam"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--barcode"
|
||||
description: "The barcode for the well that is being processed. Is only used to\
|
||||
\ add a metadata\ncolumn to all output files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--well_id"
|
||||
description: "ID of this well. Only used to add a metadata column to the output\
|
||||
\ files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--processedBAMFile"
|
||||
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
|
||||
\ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
|
||||
\ to which the read was mapped to.\n"
|
||||
info: null
|
||||
default:
|
||||
- "processedBamFile.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
|
||||
column) and the number of genes on that chromosome that had at least one\nread\
|
||||
\ mapped to it (NumberOfGenes).\n"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrUMIsPerCB"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrUMIsPerCB.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--umiFreqTop"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per UMI (column UB) the frequency at which they occur in the reads (column\n\
|
||||
N). Only the top 100 UMIs are included.\n"
|
||||
info: null
|
||||
default:
|
||||
- "umiFreqTop100.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--threads"
|
||||
description: "Number of threads to use for decompressing BAM files.\n"
|
||||
info: null
|
||||
default:
|
||||
- 1
|
||||
required: false
|
||||
min: 1
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Generate summary statistics from BAM files generated by STAR solo."
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test.sam"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "debian:stable-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v0.3.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "docker"
|
||||
env:
|
||||
- "PIP_BREAK_SYSTEM_PACKAGES=1"
|
||||
- "HTSLIB_LIBRARY_DIR=/usr/lib/"
|
||||
- "HTSLIB_INCLUDE_DIR=/usr/include/"
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "python3"
|
||||
- "python3-pip"
|
||||
- "python3-venv"
|
||||
- "python-is-python3"
|
||||
- "libhts-dev"
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pysam"
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/generate_well_statistics/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/generate_well_statistics"
|
||||
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
|
||||
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "v0.3.0"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1344
target/executable/stats/generate_well_statistics/generate_well_statistics
Executable file
1344
target/executable/stats/generate_well_statistics/generate_well_statistics
Executable file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user