Build branch v0.3 with version v0.3.0 (bc7a95f)

Build pipeline: viash-hub.htrnaseq.v0.3-bxj5m

Source commit: bc7a95f98f

Source message: Bump version to v0.3.0 (2)
This commit is contained in:
CI
2025-01-17 17:02:55 +00:00
commit f1b001e1da
228 changed files with 117817 additions and 0 deletions

View File

@@ -0,0 +1,232 @@
name: "create_eset"
namespace: "eset"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "author"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--pDataFile"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fDataFile"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--mappingDir"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--poolName"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output"
info: null
default:
- "eset.$id.rds"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "r_script"
path: "script.R"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "r_script"
path: "test.R"
is_executable: true
- type: "file"
path: "pData.tsv"
- type: "file"
path: "fData.tsv"
- type: "file"
path: "mapping_dir"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "rocker/r2u:24.04"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "r"
cran:
- "data.table"
- "nlcv"
bioc:
- "Seurat"
bioc_force_install: false
test_setup:
- type: "r"
cran:
- "testthat"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/eset/create_eset/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/eset/create_eset"
executable: "target/executable/eset/create_eset/create_eset"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,211 @@
name: "create_fdata"
namespace: "eset"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "contributor"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--gtf"
description: "Genome annotation file in GTF format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output"
description: "Tab-delimited text file containing information about the 'gene'\
\ or 'transcript'\nentries from the input GTF file. The 'transcript' entries\
\ are used in case the source\nof the GTF was 'refGene' or 'ncbiRefSeq'. \n"
info: null
default:
- "fData.$id.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "create_fdata.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Create a fdata file\n"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test_annotation.gtf"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/eset/create_fdata/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/eset/create_fdata"
executable: "target/executable/eset/create_fdata/create_fdata"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,225 @@
name: "create_pdata"
namespace: "eset"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "contributor"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--star_stats_file"
description: "Tab-delimited text file containing statistics (per column) that\
\ were generated\nfrom the STAR log files (Log.final.out, Summary.csv, ReadsPerGene.out.tab).\n\
Each entry (row) in the file describes the values for one well (barcode).\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChromPool"
description: "Pivot table in tsv format of the combined nrReadsNrGenesPerChrom\
\ files from STAR. \nDescribes per chromosome (as columns) the number of reads,\
\ as well as the total number \nof reads per cell barcode and the percentage\
\ of nuclear, ERCC and mitochondrial\nreads.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output"
info: null
default:
- "pData.$id.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "create_pdata.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Create a pdata file by combining the mapping statistics \n"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "nrReadsNrGenesPerChromPool.txt"
- type: "file"
path: "starLogs.txt"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/eset/create_pdata/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/eset/create_pdata"
executable: "target/executable/eset/create_pdata/create_pdata"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,182 @@
name: "check_eset"
namespace: "integration_test_components/htrnaseq"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--eset"
description: "Path to an ExpressionSet object."
info: null
example:
- "eset.rds"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--star_output"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
resources:
- type: "r_script"
path: "script.R"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "This component test the ExpressionSet object as output by the main pipeline."
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "bioconductor/bioconductor_docker:3.19"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "r"
cran:
- "bit64"
bioc:
- "Biobase"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/integration_test_components/htrnaseq/check_eset/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/integration_test_components/htrnaseq/check_eset"
executable: "target/executable/integration_test_components/htrnaseq/check_eset/check_eset"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,192 @@
name: "check_cutadapt_output"
namespace: "integration_test_components/well_demultiplexing"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--fastq_r1"
description: "Path to the forward reads to test."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--fastq_r2"
description: "Path to the reverse reads to test."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--ids"
description: "Well IDs for the corresponding fastq input"
info: null
required: true
direction: "input"
multiple: true
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "This component test the cutadapt output from the well_demultiplex subworkflow."
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "dnaio"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/integration_test_components/well_demultiplexing/check_cutatapt_output/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output"
executable: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,177 @@
name: "publish_fastqs"
namespace: "io"
version: "v0.3.0"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
description: "Directory to write R1 fastq data to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Directory to write R2 fastq data to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "$id"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "code.sh"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Publish the fastq files per well"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/io/publish_fastqs/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/io/publish_fastqs"
executable: "target/executable/io/publish_fastqs/publish_fastqs"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,221 @@
name: "publish_results"
namespace: "io"
version: "v0.3.0"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--star_output"
description: "Output from mapping with STAR"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChrom"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--star_qc_metrics"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--eset"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--f_data"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--p_data"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--html_report"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "$id"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "code.sh"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Publish the results"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/io/publish_results/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/io/publish_results"
executable: "target/executable/io/publish_results/publish_results"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,290 @@
name: "parallel_map"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
role: "Core Team Member"
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--genomeDir"
description: "STAR reference directory"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcodes"
description: "The barcodes/wells to process"
info: null
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Barcode arguments"
arguments:
- type: "integer"
name: "--umiLength"
description: "The length of the UMIs"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--limitBAMsortRAM"
info: null
default:
- "10000000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Runtime arguments"
arguments:
- type: "integer"
name: "--runThreadN"
description: "Number of threads to use for a single STAR execution."
info: null
default:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
description: "Location of the output folders, 1 folder per barcode. The value\
\ used\nfor this argument must contain a '*', which will be replaced with the\n\
barcode to form the final output location for that barcode.\n"
info: null
default:
- "./*"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--joblog"
description: "Where to store the log file listing all the jobs."
info: null
default:
- "execution_log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "STAR"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference\
\ (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "wget"
- "automake"
- "make"
- "gcc"
- "g++"
- "zlib1g-dev"
- "parallel"
- "file"
interactive: false
- type: "docker"
copy:
- "STAR /usr/local/bin/$STAR_BINARY"
build_args:
- "STAR_V=2.7.6a"
env:
- "STAR_SOURCE=\"https://github.com/alexdobin/STAR/archive/refs/tags/$STAR_V.tar.gz\""
- "STAR_TARGET=\"/app/star-$STAR_V.tar.gz\""
- "STAR_INSTALL_DIR=\"/app/STAR-$STAR_V\""
- "STAR_BINARY=STAR"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/parallel_map/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/parallel_map"
executable: "target/executable/parallel_map/parallel_map"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

Binary file not shown.

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,235 @@
name: "create_report"
namespace: "report"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "author"
- "maintainer"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--eset"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_report"
info: null
example:
- "report.html"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "r_script"
path: "script.R"
is_executable: true
- type: "r_script"
path: "template.Rmd"
is_executable: true
- type: "r_script"
path: "plateLayouts.R"
is_executable: true
- type: "file"
path: "OutputSTARsolo.png"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Create a basic QC report in HTML format based on a number of esets.\n"
test_resources:
- type: "r_script"
path: "test.R"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "rocker/r2u:24.04"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "pandoc"
interactive: false
- type: "r"
cran:
- "ggplot2"
- "knitr"
- "gridExtra"
- "RColorBrewer"
- "processx"
- "whisker"
- "rmarkdown"
- "bookdown"
- "data.table"
- "platetools"
- "htmltools"
- "DT"
- "logger"
- "bit64"
bioc:
- "Biobase"
- "ComplexHeatmap"
script:
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
, getOption(\"repos\")))"
bioc_force_install: false
test_setup:
- type: "r"
packages:
- "testthat"
- "R.utils"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/report/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/report/create_report"
executable: "target/executable/report/create_report/create_report"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,430 @@
#' Displays the annotation of the wells in a plateLayout
#' @param plateData a data.table object containing the information
#' of the plate. This must contain a "WellID".
#' @param plateName The plate name
#' @param valueVariable The name of the variable in 'plateData' to
#' be visualized in a plate layout.
#' @param textVariable The name of the variable in 'plateData' to be
#' shown in the wells of the plate layout. If NULL, the valueVariable
#' is shown.
#' @param colours A named character vector containing the colours
#' for the different levels of the valuevariable. The names should
#' correspond to the dose levels. if not specified, a scheme of blues
#' will be provided.
#' @param breaks Numeric vector indicating breaks for plot coloring.
#' @param colourWellText Colour to display the text in the wells.
#' @param layout Integer vector of length two with number of rows and
#' colums in a plate, e.g. \code{c(16,24)}
#' @param legend.title A title for the legend
#' @param plot.title A title for the plot, will be contracted
#' with the plate name
#' @param ... additional arguments for \code{plateLayout.default} function
#' @import data.table
#' @importFrom platetools fill_plate
#' @export
plateLayout.annotation <- function(
plateData,
plateName = character(),
valueVariable = "Dose",
textVariable = NULL,
breaks = NULL, colours = NULL,
colourWellText = "black",
layout = c(16, 24),
legend.title = "Dose",
plot.title = "Plate Annotation - ",
textFontSize = 9, ...
) {
WellID <- Label <- NULL
if (!(all(c("WellID", "SampleName") %in% colnames(plateData)))) {
stop(" 'WellID' and 'SampleName' column required in plateData object")
}
#Check WellID Format
checkWellID <- grepl("^[[:upper:]]{1,2}[[:digit:]]{1,2}$", plateData$WellID)
if(!all(checkWellID)){
stop("WellID does not have the correct format")
}
plateData[, WellID := paste0(
sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
sprintf(
"%02d", as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
)
)]
plateData <- platetools::fill_plate(plateData, "WellID", plate = layout[1]*layout[2])
plateData$column <- factor(
sprintf(
"%02d",
as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
),
levels = sprintf("%02d", seq(1, layout[2]))
)
plateData$row <- factor(sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
levels = LETTERS[seq(1, layout[1])])
if (!is.null(valueVariable)){
plateData[, values := as.character(plateData[, ..valueVariable][[1]])]
valueVar <- "values"
}else{
plateData[, values := "grey"]
valueVar <- "values"
colours <- setNames("grey", "grey")
}
if (is.null(colours)) {
blues <- colorRampPalette(c("#d6e0ff", "#2171B5"))
greens <- colorRampPalette(c("light green", "dark green"))
numLevels <- sort(as.numeric(as.character(unique(plateData[, values])[
grepl(
"^[[:digit:]]+([.][[:digit:]]+)?$",
trimws(unique(plateData[, values]))
)
])))
otherLevels <- sort(as.character(unique(plateData[, values])[
!grepl(
"^[[:digit:]]+([.][[:digit:]]+)?$",
trimws(unique(plateData[,values]))
)
]))
colours <- c(blues(length(numLevels)), greens(length(otherLevels)), "red")
names(colours) <- c(numLevels, otherLevels, "failed")
}
if (!is.null(textVariable)) {
plateData[,
Label := do.call(paste, c(.SD, sep = "\n ")),
.SDcols = textVariable
]
plateData[, Label := gsub("-", "-\n", Label)]
plateData[, Label := gsub("_", "_\n", Label)]
textVar <- "Label"
} else {
textVar <- NULL
}
if (is.null(breaks)){
breaks <- seq_len(length(colours))
}
plateLayout(
plateData = plateData, valueVariable = valueVar,
textVariable = textVar, plateName = plateName,
breaks = breaks, colourWellText = colourWellText,
legend.title = legend.title, layout = layout,
colours = colours, plot.title = plot.title,
textFontSize = textFontSize, ...
)
}
#' Create a heatmap of values in a plateLayout view. The values can be
#' library sizes, number of genes, qcScore (0/1) or a factor.
#' @param plateData A data.table of the values to be visualized with
#' at least the column of interest (specified in 'varOfInterest')
#' and a 'WellID' column indicating the wells in the plate. The WellID
#' is a combination of a letter (row in the plate) and an integer
#' (column in the plate).
#' @param valueVariable The name of the variable in 'plateData'
#' to be visualized in a plate layout
#' @param textVariable The name of the variable in 'plateData'
#' to be shown in the wells of the plate layout. Defaults to the
#' valueVariable and if NULL, no text will be displayed.
#' @param breaks Numeric vector indicating breaks for plot coloring.
#' @param colours Colours to be used for levels specified by
#' the breaks. If NULL, a colour scheme of purples is shown.
#' @param colourWellText Colour to display the text in the wells.
#' @param layout Integer vector of length two with number of rows
#' and colums in a plate, e.g. \code{c(16,24)}
#' @param makeContourColours Logical, whether or not the plate
#' layout will contain a contour colours for the wells based on the
#' parameters in 'contourColours' and 'categories'
#' @param contourVariable The variable used for the contour colouring
#' @param contourColours Character vector specifying a colour for
#' each range in 'categories'
#' @param labelsCategories Character vector specifying the names
#' (labels) for each range in 'categories'
#' @param categories if contour Variable is not a factor, a numeric
#' vector specifying the categories to divide the 'varOfInterest',
#' including the lower and upper limits.
#' @param plateName The plate name
#' @param plot.title A title for the plot, will be contracted with
#' the plate name
#' @param legend.title A title for the legend
#' @param displayHeatmap Logical, whether to display the plateLayout heatmap
#' @param saveHeatmap Logical, whether to save the plateLayout heatmap
#' @param outputDir The directory where the plateLayout heatmap should be saved
#' @param prefix The prefix to the file name of the saved plateLayout heatmap
#' @param ... additional arguments for \code{ComplexHeatmap::Heatmap} function
#' @importFrom platetools fill_plate
#' @importFrom RColorBrewer brewer.pal
#' @importFrom ComplexHeatmap Heatmap
#' @importFrom circlize colorRamp2
#' @importFrom grid grid.text grid.rect gpar legendGrob gpar
#' @importFrom grDevices dev.off png
#' @importFrom graphics title
#' @export
plateLayout <- function(
plateData, valueVariable, textVariable = valueVariable,
breaks = NULL, colours = NULL, colourWellText = "white", textFontSize = 6,
layout = c(16, 24), makeContourColours = FALSE, contourVariable = character(),
contourColours = c("red", "orange", "seagreen3"),lwdContours = c(1, 1, 1),
labelsCategories = c('1', '2', '3'), categories = NULL, plateName = character(),
plot.title = character(), legend.title = NULL, legendFontSize = 15,
row_split = rep("A", 16), col_split = rep("A", 24), legendFontSizeTitle = 15,
displayHeatmap = TRUE, saveHeatmap = FALSE, outputDir = ".", prefix = ""
) {
WellID <- NULL
if (!(all(c("WellID", "SampleName") %in% colnames(plateData)))) {
stop(" 'WellID' and 'SampleName' column required in plateData object")
}
plateData[, WellID := paste0(
sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
sprintf(
"%02d",
as.numeric(sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID))
)
)]
plateData <- platetools::fill_plate(plateData, "WellID", plate = 384)
plateData$column <- factor(
sprintf("%02d", as.numeric(
sub(".*[[:alpha:]](.+)", "\\1", plateData$WellID)
)),
levels = sprintf("%02d", seq(1, layout[2]))
)
plateData$row <- factor(sub(".*([[:alpha:]]).+", "\\1", plateData$WellID),
levels = LETTERS[seq(1, layout[1])])
plateValues <- plateLayoutFormat(
plateData,
varOfInterest = valueVariable,
rows = layout[1],
cols = layout[2]
)
if (!is.null(textVariable)) {
plateText <- plateLayoutFormat(
plateData, varOfInterest = textVariable,
rows = layout[1],
cols = layout[2]
)
}
plot.title <- gsub(
"^([a-z])", "\\U\\1",
gsub("([A-Z])", " \\1",
plot.title, perl = TRUE), perl = TRUE
)
mainTitle <- paste0(plot.title, plateName)
plateContourColours <- matrix("", nrow = layout[1], ncol = layout[2])
if (makeContourColours) {
contourData <- plateData[WellType %in% c("nonEmpty", "Treated Wells"), ]
if (is.numeric(contourData[, ..contourVariable][[1]])) {
contourData$contours <- cut(
contourData[, ..contourVariable][[1]],
categories, left = TRUE,
right = TRUE,
labels = labelsCategories)
}
else {
contourData$contours <- contourData[, ..contourVariable][[1]]
}
names(contourColours) <- labelsCategories
names(lwdContours) <- labelsCategories
for (i in seq_len(layout[1])) {
for (j in seq_len(layout[2])) {
tryCatch({
sampleHit <- which(
as.character(contourData$WellID) == paste0(
LETTERS[i], sprintf("%02d", j)
)
)
if (length(sampleHit) == 1) {
plateContourColours[i, j] <- as.character(
contourData[sampleHit,'contours'][[1]]
)
}
},
error = function(e) {
print(paste0(LETTERS[i], sprintf("%02d", j), " is missing."))
}
)
}
}
}
plateValues$contours <- plateContourColours
colnames(plateValues$values) <- seq_len(ncol(plateValues$values))
if (is.null(breaks)) {
breakValues <- plateValues$values
breakValues[which(is.na(breakValues))] <- 0
if (all(breakValues >= 0)) {
breaks <- computeBreaks(7, max(plateValues$values, na.rm = TRUE))
} else {
breaks <- quantile(plateValues$values, probs = seq(0, 1, 0.125))
}
}
if (is.null(colours)) {
colours <- tryCatch({
colorRamp2(
breaks = breaks,
colors = brewer.pal(length(breaks), "Purples")
)
},
error = function(cond) {
return(c("#9370DB", "white"))
})
}
ht <- Heatmap(
plateValues$values,
column_title = mainTitle, column_title_side = "top",
rect_gp = gpar(lwd = 0.4),
cluster_rows = FALSE, cluster_columns = FALSE,
col = colours, row_title = NULL,
row_split = row_split, column_split = col_split,
row_names_side = "left",
cluster_row_slices = FALSE,
cluster_column_slices = FALSE,
show_heatmap_legend = TRUE,
heatmap_legend_param = list(
title = ifelse(
is.null(legend.title),
paste0(valueVariable, "\n"),
paste0(legend.title, "\n")
),
grid_height = unit(9, "mm"), border = "black",
labels_gp = gpar(fontsize = legendFontSize),
title_gp = gpar(fontsize = legendFontSizeTitle)
),
cell_fun = function(j, i, x, y, width, height, fill) {
if (is.na(plateValues$values[i, j])) {
grid.rect(
x, y, width, height,
gp = gpar(fill = "white", alpha = 0.7, lwd = 0.7, col = "white")
)
}
else if (!is.null(textVariable)) {
grid.text(
plateText$values[i, j], x, y,
just = "centre",
gp = gpar(fontsize = textFontSize, col = colourWellText)
)
}
if (makeContourColours) {
if (!is.na(plateValues$contours[i, j])) {
grid.rect(
x, y, width, height,
gp = gpar(
col = contourColours[as.character(plateValues$contours[i, j])],
fill = NA,
lwd = lwdContours[as.character(plateValues$contours[i, j])]
)
)
}
}
}
)
if (displayHeatmap) {
print(ht)
}
if (saveHeatmap) {
png(
file.path(
outputDir,
paste0(prefix,gsub(" |-", "",plot.title), "_", plateName, ".png")
),
width = 30, height = 10, units = "cm", res = 1200
)
print(ht)
dev.off()
}
return(ht)
}
#' Return numerical matrix with number of reads that corresponds to the
#' plate layout
#' @param data A data.frame of the values to be visualized with at least
#' the columnof interest (specified in 'varOfInterest') and a 'WellID' column
#' indicating the wells in the plate. The WellID is a combination of a
#' letter (row in the plate) and an integer (column in the plate).
#' @param varOfInterest The name of the variable in 'data' to be visualized
#' in a plate layout
#' @param rows number of rows in a plate layout
#' @param cols number of columns in a plate layout
#' @param verbose if \code{TRUE}, samples missing from the plate
#' will be reported
#' @export
plateLayoutFormat <- function(
data, varOfInterest,
rows = 16, cols = 24,
verbose = FALSE
) {
plateValues <- matrix(NA, nrow = rows, ncol = cols)
for (i in seq_len(rows)) {
for (j in seq_len(cols)) {
tryCatch({
sampleHit <- which(
as.character(data$WellID) == paste0(LETTERS[i], sprintf("%02d", j))
)
if(length(sampleHit) == 1){
plateValues[i, j] <- data[sampleHit, ..varOfInterest][[1]]
}
},
error = function(e) {
if (verbose == TRUE) {
print(paste0(LETTERS[i], sprintf("%02d", j), " is missing."))
}
}
)
}
}
row.names(plateValues) <- LETTERS[1:rows]
return(list("values" = plateValues))
}
#' Helper function to automate break selection for raw count data
#'
#' This function creates an exponentially increasing vector for given number
#' breaks between zero and some element of choice. It is particularly useful for
#' raw counts or raw counts per million.
#'
#' @param nBreaks Number of breaks to be generated
#' @param maxElement Maximum value of data entries
#' @export
computeBreaks <- function(nBreaks, variable) {
maxElement <- max(variable, na.rm = TRUE)
if (length(unique(variable)) == 1) {
breaks <- c(0, 0.5, ifelse(maxElement < 1, 1, maxElement))
} else {
coefSystem <- solve(
rbind(c(1, 1), c(1, (nBreaks - 1)))) %*% c(0, log(maxElement)
)
coefExp <- c(exp(coefSystem[1]), coefSystem[2])
breaks <- coefExp[1] * exp((1:(nBreaks - 1)) * coefExp[2])
}
return(c(0, breaks))
}

View File

@@ -0,0 +1,977 @@
---
title: "Exploratory Data Report"
date: "`r format(Sys.time(), '%d %B, %Y')`"
editor_options:
chunk_output_type: console
output:
oaStyle::html_report
# parameters which are overwritten by the script
params:
outputDir: 'output/'
esets:
- sample1.rds
- sample2.rds
---
<!---
Copy this template in your working directory (where you want to run the report).
This template can be used as a starting document to run a preliminary DRUGseq report
-->
<!---
Use full page width
-->
<style type="text/css">
div.main-container {
max-width: 1600px !important;
margin-left: auto;
margin-right: auto;
}
</style>
```{r params, eval = TRUE, include = FALSE}
outputDir <- params$outputDir
esets <- params$esets
```
```{r outputDir, echo = FALSE}
## Required: ABSOLUTE outputDir
outputDir <- file.path(outputDir)
# When working on a windows computer it should be
# "/Users/..." instead of "C:/Users/..."
if (.Platform$OS.type == "windows") {
outputDir <- paste0(
"/",
paste(
unlist(strsplit(outputDir, split = "/"))[-1], collapse = "/"
),
"/"
)
}
```
```{r optionsChunkDoNotModify, echo = FALSE, message = FALSE, warning=FALSE}
## Chunk with options for knitr. This chunk should not be modified.
knitr::opts_chunk$set(
eval = TRUE,
echo = FALSE,
message = FALSE,
cache = FALSE,
warning = FALSE,
error = FALSE,
comment = NA, #"#",
tidy = FALSE,
collapse = TRUE,
out.width = "100%",
fig.width = 20,
fig.height = 10,
results = "asis")
knitr::opts_knit$set(root.dir = getwd())
options(warn = 1, width = 200)
```
```{r libraries_and_functions}
source("plateLayouts.R")
library(ComplexHeatmap)
library(data.table)
library(ggplot2)
library(knitr)
library(Biobase)
library(gridExtra)
library(RColorBrewer)
```
```{r dataImport}
# Create esetList
esetList <- sapply(
esets, simplify = FALSE,
USE.NAMES = TRUE,
function(eset_raw) {
if (!file.exists(eset_raw)) {
stop(paste0("Provided path '", eset_raw, "' is not a file."))
}
eset <- readRDS(eset_raw)
}
)
pools <- sapply(esetList, function(eset) {
unique(eset$PoolName)
})
names(esetList) <- unlist(pools)
# Create qcData
pDataList <- lapply(esetList, function(eset) data.table(pData(eset)))
qcData <- rbindlist(pDataList, fill = TRUE)
textVars <- "SampleName"
annotationVar <- "PoolName"
if (!"SampleName" %in% names(qcData)) {
qcData[, SampleName := paste0(PoolName, "_", WellBC)]
}
qcData[, log10LibSize := round(log10(NumberOfInputReads))]
qcData[, (annotationVar) := lapply(.SD, as.factor), .SDcols = annotationVar]
colourList <- list()
Design_levels <- sort(
as.character(unique(qcData[, ..annotationVar][[1]])),
decreasing = TRUE
)
if (length(Design_levels) == 1) {
colours <- c("#d6e0ff", "lightgrey")
names(colours) <- c(Design_levels, "Empty")
colourList[[annotationVar]] <- list(
"colours" = colours,
"annotVar" = annotationVar,
"text" = textVars
)
}else if (length(Design_levels) == 2) {
colours <- c("#d6e0ff", "#FF9999")
names(colours) <- c(Design_levels)
colourList[[annotationVar]] <- list(
"colours" = colours,
"annotVar" = annotationVar,
"text" = textVars
)
} else if (length(Design_levels) <= 20) {
if (length(Design_levels) > 12) {
colours <- c(
brewer.pal(12, "Set3"),
brewer.pal((length(Design_levels) - 12),
"Pastel2")
)
} else {
colours <- c(brewer.pal(length(Design_levels), "Set3"))
}
names(colours) <- c(Design_levels)
colourList[[annotationVar]] <- list(
"colours" = colours,
"annotVar" = annotationVar,
"text" = textVars
)
} else {
colours <- c("#d6e0ff")
names(colours) <- c("nonEmpty")
colourList[[annotVar]] <- list(
"colours" = colours,
"annotVar" = annotVar,
"text" = annotVar
)
}
```
# Pool Description
Per pool within this study, there are several pool layout plots shown, based on the
* number of STAR input reads (= library size)
* log10 transformed number of STAR input reads
* number of detected UMIs
* number of detected genes
* number of chromosomal reads
* percentage of ERCC
* percentage of mitochondria
> The values for the different samples within each pool is expected to be comparable if the content of the different pools is equally diverse.
```{r plateAnnotation, out.width = "100%",fig.width = 20, fig.height= 10}
plateVars <- c("NumberOfInputReads", "log10LibSize", "NumberOfMappedReads",
"NumberOfChromReads", "NumberOfUMIs", "NumberOfGenes",
"pctMT", "pctERCC")
breaksVars <- lapply(
plateVars,
function(var) {
computeBreaks(7, qcData[, ..var])
}
)
names(breaksVars) <- plateVars
for (pool in pools){
cat("\n\n")
cat(paste0("## ", pool, " {.tabset} \n\n"))
poolData <- qcData[PoolName == pool]
lapply(plateVars, function(plateVar) {
cat("\n\n")
cat(sprintf("### %s {.unnumbered}", plateVar))
cat("\n\n")
plateLayout(
poolData, valueVariable = plateVar,
textFontSize = 10, legendFontSize = 12,
plateName = pool, plot.title = "libSize - ",
legend.title = "libSize", breaks = breaksVars[[plateVar]]
)
cat("\n\n")
})
cat("\n\n")
}
```
<br>
# Data Distributions
## Reads Distributions {.tabset}
The 4 box plots below represent the distributions per pool of the different samples based on:
* the number of STAR input reads
* the number of STAR mapped reads
* the percentage of STAR mapped reads
* the number of detected genes
> The distributions contribute to the QC metrics mentioned in Par 3. The higher these values, the better.
> The data range for the different plates is expected to be comparable if the content of the different plates is equally diverse.
### Number of Input Reads {.tabset .unnumbered}
```{r settings_1}
nColPlots = 1
figHeight = 7
```
#### Distribution {.tabset .unnumbered}
```{r boxplots_input_plate, fig.height = figHeight}
ggplot(
qcData,
aes(
x = PoolName,
y = NumberOfInputReads, colour = PoolName
)
) + geom_boxplot() + ylab("Number of Input Reads") +
ggtitle("Number of Input Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
### Number of Mapped Reads {.tabset .unnumbered}
#### Distribution {.unnumbered}
```{r boxplots_mapped_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = NumberOfMappedReads, colour = PoolName)
) + geom_boxplot() + ylab("Number of Mapped Reads") +
ggtitle("Number of Mapped Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
#### pct Mapped Reads {.unnumbered}
```{r boxplots_pctMapped_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = PctMappedReads, colour = PoolName)
) +
geom_boxplot() +
ylab("pct Mapped Reads") +
ggtitle("pct Mapped Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
### Number of Chromosomal Reads {.tabset .unnumbered}
#### Distribution {.unnumbered}
```{r boxplots_chrom_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = NumberOfChromReads, colour = PoolName)
) + geom_boxplot() + ylab("Number of Chromosomal Reads") +
ggtitle("Number of Chromosomal Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
#### pct Chromosomal Reads {.unnumbered}
```{r boxplots_pctChrom_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = pctChrom, colour = PoolName)
) + geom_boxplot() + ylab("pct Chromosomal Reads") +
ggtitle("pct Chromosomal Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
### Number of UMIs {.tabset .unnumbered}
#### Distribution {.tabset .unnumbered}
```{r boxplots_umi_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = NumberOfUMIs, colour = PoolName)
) + geom_boxplot() + ylab("Number of UMIs") +
ggtitle('Number of UMIs') +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
#### Density distribution {.unnumbered}
```{r density_numberOfUMIs}
## Pre-filtering data exploration
dt_plot <- melt(
qcData,
id.vars = c("SampleName", "PoolName", "WellID"),
measure.vars = c("NumberOfInputReads", "NumberOfMappedReads", "NumberOfUMIs")
)
readsDensity_plot <- ggplot(dt_plot, aes(value))
readsDensity_plot <- readsDensity_plot +
geom_density(aes(fill = variable), alpha=0.8) +
facet_grid(~ PoolName, scales = "free_x", space = "fixed", drop = TRUE) +
geom_vline(
xintercept = 5e5,
linetype = "dashed",
color = "steelblue3", size = 2
) +
annotate(
"text",
x = 3.5e5, y = 2e-6, label = "500k",
angle = 90, color = "steelblue3", size = 10
) +
geom_vline(
xintercept = 1.5e6, linetype = "dashed",
color = "forestgreen", size = 2
) +
annotate(
"text", x = 1.35e6, y = 2e-6, label = "1.5M",
angle = 90, color = "forestgreen", size = 10
) +
labs(
title = "Density plot",
subtitle = paste0(
"# Samples with NumberOfMappedReads > 1.5M: ",
length(which(qcData$NumberOfMappedReads > 1.5e6)),
"\n# Samples with NumberOfUMIs > 500k: ",
length(which(qcData$NumberOfUMIs > 5e5))
),
caption = paste0("# Total samples (after removing empty): ", nrow(qcData)),
x = "Count",
fill = "Variable"
) +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 5),
axis.text.x = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
plot.subtitle = element_text(size = 17),
plot.caption = element_text(size = 15),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
)
readsDensity_plot
```
### Number of Genes {.tabset .unnumbered}
#### Distribution {.unnumbered}
```{r boxplots_genes_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = NumberOfGenes, colour = PoolName)
) +
geom_boxplot() + ylab("Number of Genes") +
ggtitle("Number of Genes") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
## {.tabset .toc-ignore .unnumbered}
In addition, several plots are shown visualizing the efficiency of the reads-to-genes translation:
* the number of input reads vs the number of mapped reads
* the number of chromosomal reads vs the number of mapped reads
* the number of mapped reads per UMI vs the number of mapped reads
* the number of UNI vs the number of mapped reads
* the number of mapped reads vs the number of genes
* the number of chromosomal reads vs the number of genes
* the number of mapped reads per UMI vs the number of genes
### Mapping Efficiency {.tabset .unnumbered}
#### Number of Input Reads {.unnumbered}
```{r mapping_efficiency_1_plate, fig.height = 7}
ggplot(
qcData,
aes(x = NumberOfInputReads, y = NumberOfMappedReads, colour = PoolName)
) +
geom_point() +
xlab("Number of Input Reads") +
ylab("Number of Mapped Reads") +
ggtitle("Number of Mapped Reads vs Number of Input Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
#### Number of Chromosomal Reads {.unnumbered}
```{r mapping_efficiency_2_plate, fig.height = 7}
ggplot(
qcData,
aes(x = NumberOfChromReads, y = NumberOfMappedReads, colour = PoolName)
) + geom_point() +
xlab("Number of Chromosomal Reads") + ylab("Number of Mapped Reads") +
ggtitle("Number of Chromosomal Reads vs Number of Mapped Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
#### Number of UMI {.unnumbered}
```{r mapping_efficiency_4_plate, fig.height = 7}
ggplot(
qcData,
aes(x =NumberOfUMIs, y = NumberOfMappedReads, colour = PoolName)
) + geom_point() +
ylab("Number of Mapped Reads") + xlab("Number of UMIs ") +
ggtitle("Number of UMIs vs Number of Mapped Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
### Counting Efficiency {.tabset .unnumbered}
#### Number of Mapped Reads {.unnumbered}
```{r gene_efficiency_1_plate, fig.height = 7}
ggplot(
qcData,
aes(x = NumberOfMappedReads, y = NumberOfGenes, colour = PoolName)
) + geom_point() +
ylab("Number of Genes") + xlab("Number of Mapped Reads") +
ggtitle("Number of Genes vs Number of Mapped Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
#### Number of Chromosomal Reads {.unnumbered}
```{r gene_efficiency_2_plate, fig.height = 7}
ggplot(
qcData,
aes(x = NumberOfChromReads, y = NumberOfGenes, colour = PoolName)
) + geom_point() +
ylab("Number of Genes") + xlab("Number of Chromosomal Reads") +
ggtitle("Number of Genes vs Number of Chromosomal Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
## Sequencing Saturation {.tabset}
The barplots below represent the sequencing saturation per sample as determined by STAR, split per pool.
The HT-RNAseq platform aims for shallow sequencing resulting in relatively low sequencing saturations of 10-20%.
In addition, the sequencing saturation vs the number of input reads is shown.
### Sequencing Saturation {.unnumbered}
```{r sequencingSaturation, fig.height = figHeight}
ggplot(
qcData,
aes(x = WellID, y = SequencingSaturation, fill = PoolName)
) + geom_bar(stat = "identity", position = "dodge") +
xlab("Samples") + ggtitle("Sequencing Saturation per Sample") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(1, "lines"),
text = element_text(size = 10),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 15),
axis.ticks.x = element_blank()
)
```
### Sequencing Saturation - Input Reads {.unnumbered}
```{r sequencingSaturation_inputReads, fig.height = figHeight}
ggplot(
qcData,
aes(x = NumberOfInputReads, y = SequencingSaturation, colour = PoolName)
) + geom_point() +
ggtitle("Sequencing Saturation vs Number of Input Reads") +
theme(strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
### Sequencing Saturation - Mapped Reads {.unnumbered}
```{r sequencingSaturation_mappedReads, fig.height = figHeight}
ggplot(
qcData,
aes(x = NumberOfChromReads, y = SequencingSaturation, colour = PoolName)
) + geom_point() +
ggtitle("Sequencing Saturation vs Number of Chromosomal Reads") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size=10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size=18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
<br>
## Genomic Origin {.tabset}
The 3 boxplots below represent, per pool, the distributions of the percentage of reads mapping to:
* chromosomal regions
* mitochondrial regions
* ERCC spike-ins
The 4th plot summarises the above results across samples per pool.
The 5th plot shows the percentage of reads mapped to the transcriptome (as counted by STAR). This measurement serves as a proxy for the percentage of reads mapped to exons.
> The percentage ERCC contributes to the QC metrics mentioned in Par 3. This value is ideally as low as possible (but non-zero to ensure the they have been spiked in) and comparable for the different pools.
### pctChrom {.tabset .unnumbered}
```{r genomicOrigin_chrom_plate, fig.height = figHeight}
ggplot(
qcData, aes(x = PoolName, y = pctChrom, colour = PoolName)
) +
geom_boxplot() +
ggtitle("pctChrom") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
### pctMT {.tabset .unnumbered}
```{r genomicOrigin_mt_plate, fig.height = figHeight}
ggplot(
qcData,
aes(x = PoolName, y = pctMT, colour = PoolName)
) +
geom_boxplot() + ggtitle("pctMT") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
### pctERCC {.tabset .unnumbered}
```{r genomicOrigin_ercc_plate, fig.height = figHeight}
ggplot(qcData, aes(x = PoolName, y = pctERCC, colour = PoolName)) +
geom_boxplot() +
ggtitle("pctERCC") +
theme(
strip.text.x = element_text(size = 20),
panel.spacing = unit(2, "lines"),
text = element_text(size = 10),
axis.text.y = element_text(angle = 90, size = 14),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title.y = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
```
### Genomic Summary {.tabset .unnumbered}
```{r genomicOrigin_summary_plate}
meanPctChromMTData <- qcData[, .(
"pctChrom" = median(pctChrom),
"pctMT" = median(pctMT),
"pctERCC" = median(pctERCC)
), by = PoolName]
meanPctChromMTDataLong <- melt(
meanPctChromMTData,
id.vars = "PoolName",
measure.vars = c("pctChrom", "pctMT", "pctERCC"),
variable.name = "Origin", value.name = "pct"
)
ggplot(
meanPctChromMTDataLong,
aes(fill = Origin, y = pct, x = PoolName)) +
geom_bar(position = "stack", stat = "identity") +
ggtitle("Genomic Origin") +
theme(
text = element_text(size = 10),
axis.text = element_text(angle = 90, size = 15),
plot.title = element_text(size = 18),
legend.text = element_text(size = 15),
legend.title = element_text(size = 17),
axis.title = element_text(size = 15)
)
```
# Depletion {.tabset}
<div align="center">
```{r depletion}
for (eset_name in pools) {
cat("\n\n")
cat(paste0("## ", eset_name, " {.unnumbered}"))
cat("\n\n")
eset <- esetList[[eset_name]]
average_reads <- sort(apply(exprs(eset), 1, mean), decreasing = TRUE)
plotData <- data.table(
ENSGID = names(average_reads),
av_count = average_reads
)
gen_descript <- data.table(
ENSGID = eset@featureData@data$gene_id,
Description = eset@featureData@data$GENENAME
)
order_gen_descript <- gen_descript[
match(plotData$ENSGID, gen_descript$ENSGID),
]
g <- ggplot(
plotData[c(1:100)],
aes(x = reorder(ENSGID, -av_count), y = av_count)
) + geom_bar(stat = "identity") +
theme(
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 12),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.title = element_text(size = 18),
plot.title = element_text(size = 20)
) + ylab("Average Counts") + xlab("Genes")
print(g)
cat("\n\n")
cat("<br>")
cat("<br>")
print(htmltools::tagList((DT::datatable(order_gen_descript[1:100, ]))))
}
```
</div>
<br>
<br>
<br>
<br>
# Glossary {.unnumbered}
## Read {.unlisted .unnumbered}
A read is a oligonucleotide (a short RNA fragment) that has been sequenced. It consists of a fixed number of base pairs (bp) and therefore has a specific read length.
## Input Read {.unlisted .unnumbered}
Each read of the fastq file used as input to the STAR aligner is considered an input read.
## Read With Valid Barcode {.unlisted .unnumbered}
A read with a valid barcode is a read for which the barcode matches the white list of barcodes under the given restriction of the number of allowed mismatches. The number of reads with a valid barcode is lower or equal to the number of input reads.
## Mapped Read {.unlisted .unnumbered}
A read that has been aligned against the reference genome and for which one or more suitable matching locations have been found is a mapped read. Depending on the number of allowed mismatches this might or might not be be an exact match. The number of mapped reads is lower or equal to the number of reads with a valid barcode.
## Uniquely Mapped Read {.unlisted .unnumbered}
A read for which one and only one suitable matching location in the reference genome was found is an uniquely mapped read. The number of uniquely mapped reads is lower or equal to the number of mapped reads.
## Counted Read {.unlisted .unnumbered}
A mapped read will only be counted if it overlaps (1 nucleotide or more) with one and only one gene. The number of counted reads is lower or equal to the number of (uniquely) mapped reads.
## UMIs {.unlisted .unnumbered}
Unique molecular identifiers (UMI) are short sequences in order to uniquely tag each molecule in a sample library. Sequencing with UMIs allows bioinformatics software to filter out duplicate reads and PCR errors with a high level of accuracy and report unique reads.
The reported UMIs is the number of UMIs among the set of reads that map to an unique gene, i.e the number of reads is deduplicated.
## pctERCC {.unlisted .unnumbered}
The percentage of reads mapping to the ERCC genes among the total number of **mapped** reads.
## pctMT {.unlisted .unnumbered}
The percentage of reads mapping to the MT genes among the total number of **mapped** reads.
## Sequencing Saturation {.unlisted .unnumbered}
The sequencing saturation is a measure of the fraction of library complexity. The inverse of one minus the sequencing saturation can be interpreted as the number of additional reads it would take to detect a new transcript. Consequently, a low sequencing saturation indicates a shallow sequencing in which a new transcript could be discovered with a few reads.
<br>
<br>
<br>
<br>
<center>
![](OutputSTARsolo.png)
</center>
<br>
<br>

View File

@@ -0,0 +1,232 @@
name: "combine_star_logs"
namespace: "stats"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Arguments"
arguments:
- type: "string"
name: "--barcodes"
description: "Barcodes responding to the respective log files.\n"
info: null
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--star_logs"
description: "Paths to the STAR log files (most frequently called Log.final.out)\n"
info: null
example:
- "Log.final.out"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--gene_summary_logs"
description: "Paths to the Summary.csv files from the STAR Solo output. Can be\
\ found in\nthe 'Solo.out/Gene' folder relative to the root of the STAR output\
\ directory. \n"
info: null
example:
- "Summary.txt"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--reads_per_gene_logs"
description: "Paths to the 'ReadsPerGene.out.tab' files as output by STAR.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output"
description: "Tab-delimited file describing for each barcode (as the rows), the\
\ metrics (as columns)\ngathered from the different input files. \n"
info: null
default:
- "starLogs.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/combine_star_logs/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stats/combine_star_logs"
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,216 @@
name: "generate_pool_statistics"
namespace: "stats"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "contributor"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--nrReadsNrGenesPerChrom"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
column) and the number of genes on that chromosome that had at least one\nread\
\ mapped to it (NumberOfGenes).\n"
info: null
default:
- "processedBamFile_well1.tsv"
- "processedBamfile_well2.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChromPool"
description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
\ files. Describes\nper chromosome (as columns) the number of reads, as well\
\ as the total number \nof reads per cell barcode and the percentage of nuclear,\
\ ERCC and mitochondrial\nreads.\n"
info: null
example:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/generate_pool_statistics/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stats/generate_pool_statistics"
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,296 @@
name: "generate_well_statistics"
namespace: "stats"
version: "v0.3.0"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "contributor"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--input"
description: "The .bam file as returned by the mapping tool STAR."
info: null
example:
- "input.bam"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcode"
description: "The barcode for the well that is being processed. Is only used to\
\ add a metadata\ncolumn to all output files.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--well_id"
description: "ID of this well. Only used to add a metadata column to the output\
\ files.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--processedBAMFile"
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
\ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
\ to which the read was mapped to.\n"
info: null
default:
- "processedBamFile.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChrom"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
column) and the number of genes on that chromosome that had at least one\nread\
\ mapped to it (NumberOfGenes).\n"
info: null
default:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrUMIsPerCB"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
info: null
default:
- "nrReadsNrUMIsPerCB.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--umiFreqTop"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per UMI (column UB) the frequency at which they occur in the reads (column\n\
N). Only the top 100 UMIs are included.\n"
info: null
default:
- "umiFreqTop100.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--threads"
description: "Number of threads to use for decompressing BAM files.\n"
info: null
default:
- 1
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Generate summary statistics from BAM files generated by STAR solo."
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test.sam"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.0"
namespace_separator: "/"
setup:
- type: "docker"
env:
- "PIP_BREAK_SYSTEM_PACKAGES=1"
- "HTSLIB_LIBRARY_DIR=/usr/lib/"
- "HTSLIB_INCLUDE_DIR=/usr/include/"
- type: "apt"
packages:
- "python3"
- "python3-pip"
- "python3-venv"
- "python-is-python3"
- "libhts-dev"
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pysam"
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/generate_well_statistics/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stats/generate_well_statistics"
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
viash_version: "0.9.0"
git_commit: "bc7a95f98ff78b2c49be97128d8b31e8d29daa6d"
git_remote: "https://x-access-token:ghs_7LDB6nHZEjlpesiFpOviJpZtnArMPy0dy3S4@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "v0.3.0"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.3.0'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}