Build branch main with version main (1e1ffb3)

Build pipeline: vsh-ci-dev-jsbwk

Source commit: 1e1ffb315f

Source message: Merge pull request #17 from viash-hub/add_biobox_modules

- Migrate a number of components to biobox
- Fix tests
- Reduce size of test resources
- Prepare for Viash Hub
This commit is contained in:
CI
2024-09-13 07:41:13 +00:00
commit 1ebb61f1e8
557 changed files with 430700 additions and 0 deletions

View File

@@ -0,0 +1,249 @@
name: "bbmap_bbsplit"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--paired"
description: "Paired fastq files or not?"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "Input fastq files, either one or two (paired)"
info: null
example:
- "sample.fastq"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--primary_ref"
description: "Primary reference FASTA"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_fasta_list"
description: "Path to comma-separated file containing a list of reference genomes\
\ to filter reads against with BBSplit."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--only_build_index"
description: "true = only build index; false = mapping"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--built_bbsplit_index"
description: "Directory with index files"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--fastq_1"
description: "Output file for read 1."
info: null
default:
- "$id.$key.read_1.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Output file for read 2."
info: null
default:
- "$id.$key.read_2.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_index"
description: "Directory with index files"
info: null
default:
- "BBSplit_index"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Split sequencing reads by mapping them to multiple references simultaneously.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genome.fasta"
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
- type: "file"
path: "sarscov2.fa"
- type: "file"
path: "human.fa"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/bbmap/bbsplit/main.nf"
- "modules/nf-core/bbmap/bbsplit/meta.yml"
last_sha: "277bd337739a8b8f753fa7b5eda6743b9b6acb89"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\
\ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\
\ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bbmap_bbsplit/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bbmap_bbsplit"
executable: "target/executable/bbmap_bbsplit/bbmap_bbsplit"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,189 @@
name: "bedtools_genomecov"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity."
info: null
required: false
choices:
- "unstranded"
- "forward"
- "reverse"
- "auto"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam"
description: "Genome BAM file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_bedtools_args"
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--bedgraph_forward"
info: null
default:
- "$id.forward.bedgraph"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bedgraph_reverse"
info: null
default:
- "$id.reverse.bedgraph"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Compute BEDGRAPH (-bg) summaries of feature coverage"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "chr19.bam"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/bedtools_genomecov.nf"
last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y build-essential wget && \\\nwget --no-check-certificate\
\ https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static\
\ && \\\nmv bedtools.static /usr/local/bin/bedtools && \\\nchmod a+x /usr/local/bin/bedtools\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools_genomecov/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools_genomecov"
executable: "target/executable/bedtools_genomecov/bedtools_genomecov"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,193 @@
name: "cat_additional_fasta"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--fasta"
description: "Path to FASTA genome file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "Path to GTF annotation file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--additional_fasta"
description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\
\ sequences."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--biotype"
description: "Biotype value to use when appending entries to GTF file when additional\
\ fasta file is provided."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--fasta_output"
description: "Concatenated FASTA file."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf_output"
description: "Concatenated GTF file."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
description: "Concatenate addional fasta file to reference FASTA and GTF files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genome.fasta"
- type: "file"
path: "genes.gtf.gz"
- type: "file"
path: "gfp.fa.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/cat_additional_fasta.nf"
last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/cat_additional_fasta/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/cat_additional_fasta"
executable: "target/executable/cat_additional_fasta/cat_additional_fasta"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,180 @@
name: "cat_fastq"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--read_1"
description: "Read 1 fastq files to be concatenated"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--read_2"
description: "Read 2 fastq files to be concatenated"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--fastq_1"
description: "Concatenated read 1 fastq"
info: null
default:
- "$id.read_1.merged.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Concatenated read 2 fastq"
info: null
default:
- "$id.read_2.merged.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Concatenate multiple fastq files"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357071_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
- type: "file"
path: "SRR6357071_2.fastq.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/cat/fastq/main.nf"
- "modules/nf-core/cat/fastq/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/cat_fastq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/cat_fastq"
executable: "target/executable/cat_fastq/cat_fastq"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,248 @@
name: "deseq2_qc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--counts"
description: "Count file matrix where rows are genes and columns are samples"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--pca_header_multiqc"
info: null
default:
- "assets/multiqc/deseq2_pca_header.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--clustering_header_multiqc"
info: null
default:
- "assets/multiqc/deseq2_clustering_header.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--deseq2_vst"
description: "Use vst transformation instead of rlog with DESeq2"
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_args"
info: null
default:
- "--id_col 1 --sample_suffix '' --outprefix deseq2 --count_col 3"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_args2"
info: null
default:
- "star_salmon"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--deseq2_output"
info: null
default:
- "deseq2"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--pca_multiqc"
info: null
default:
- "deseq2.pca.vals_mqc.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--dists_multiqc"
info: null
default:
- "deseq2.sample.dists_mqc.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "deseq2_qc.r"
description: "Run DESeq2, perform PCA, generate heatmaps and scatterplots for samples\
\ in the counts files\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "counts.tsv"
- type: "file"
path: "deseq2_pca_header.txt"
- type: "file"
path: "deseq2_clustering_header.txt"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/deseq2_qc.nf"
last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "r-base"
- "libcurl4-openssl-dev"
- "libssl-dev"
- "libxml2-dev"
interactive: false
- type: "r"
cran:
- "optparse"
- "ggplot2"
- "RColorBrewer"
- "pheatmap"
bioc:
- "DESeq2"
url:
- "https://cran.r-project.org/src/contrib/Archive/matrixStats/matrixStats_1.1.0.tar.gz"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/deseq2_qc/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/deseq2_qc"
executable: "target/executable/deseq2_qc/deseq2_qc"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,246 @@
#!/usr/bin/env Rscript
################################################
################################################
## REQUIREMENTS ##
################################################
################################################
## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE
## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS.
## - PACKAGES BELOW NEED TO BE AVAILABLE TO LOAD WHEN RUNNING R
################################################
################################################
## LOAD LIBRARIES ##
################################################
################################################
library(optparse)
library(DESeq2)
library(ggplot2)
library(RColorBrewer)
library(pheatmap)
################################################
################################################
## PARSE COMMAND-LINE PARAMETERS ##
################################################
################################################
option_list <- list(
make_option(c("-i", "--count_file"), type="character", default=NULL, metavar="path", help="Count file matrix where rows are genes and columns are samples."),
make_option(c("-f", "--count_col"), type="integer", default=3, metavar="integer", help="First column containing sample count data."),
make_option(c("-d", "--id_col"), type="integer", default=1, metavar="integer", help="Column containing identifiers to be used."),
make_option(c("-r", "--sample_suffix"), type="character", default='', metavar="string", help="Suffix to remove after sample name in columns e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'."),
make_option(c("-p", "--outprefix"), type="character", default='deseq2', metavar="string" , help="Output prefix."),
make_option(c("-v", "--vst"), type="logical", default=FALSE, metavar="boolean", help="Run vst transform instead of rlog."),
make_option(c("-c", "--cores"), type="integer", default=1, metavar="integer", help="Number of cores."),
make_option(c("-o", "--outdir"), type="character", default="./", metavar="path", help="Output directory.")
)
opt_parser <- OptionParser(option_list=option_list)
opt <- parse_args(opt_parser)
if (is.null(opt$count_file)){
print_help(opt_parser)
stop("Please provide a counts file.", call.=FALSE)
}
################################################
################################################
## READ IN COUNTS FILE ##
################################################
################################################
count.table <- read.delim(file=opt$count_file,header=TRUE, row.names=NULL)
rownames(count.table) <- count.table[,opt$id_col]
count.table <- count.table[,opt$count_col:ncol(count.table),drop=FALSE]
colnames(count.table) <- gsub(opt$sample_suffix,"",colnames(count.table))
colnames(count.table) <- gsub(pattern='\\.$', replacement='', colnames(count.table))
################################################
################################################
## RUN DESEQ2 ##
################################################
################################################
if (file.exists(opt$outdir) == FALSE) {
dir.create(opt$outdir, recursive=TRUE)
}
setwd(opt$outdir)
samples.vec <- colnames(count.table)
name_components <- strsplit(samples.vec, "_")
n_components <- length(name_components[[1]])
decompose <- n_components!=1 && all(sapply(name_components, length)==n_components)
coldata <- data.frame(samples.vec, sample=samples.vec, row.names=1)
if (decompose) {
groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i)))
n_distinct <- sapply(groupings, function(grp) length(unique(grp)))
groupings <- groupings[n_distinct!=1 & n_distinct!=length(samples.vec)]
if (ncol(groupings)!=0) {
names(groupings) <- paste0("Group", 1:ncol(groupings))
coldata <- cbind(coldata, groupings)
} else {
decompose <- FALSE
}
}
DDSFile <- paste(opt$outprefix,".dds.RData",sep="")
counts <- count.table[,samples.vec,drop=FALSE]
dds <- DESeqDataSetFromMatrix(countData=round(counts), colData=coldata, design=~ 1)
dds <- estimateSizeFactors(dds)
if (min(dim(count.table))<=1) { # No point if only one sample, or one gene
save(dds,file=DDSFile)
saveRDS(dds, file=sub("\\.dds\\.RData$", ".rds", DDSFile))
warning("Not enough samples or genes in counts file for PCA.", call.=FALSE)
quit(save = "no", status = 0, runLast = FALSE)
}
if (!opt$vst) {
vst_name <- "rlog"
rld <- rlog(dds)
} else {
vst_name <- "vst"
rld <- varianceStabilizingTransformation(dds)
}
assay(dds, vst_name) <- assay(rld)
save(dds,file=DDSFile)
saveRDS(dds, file=sub("\\.dds\\.RData$", ".rds", DDSFile))
################################################
################################################
## PLOT QC ##
################################################
################################################
##' PCA pre-processeor
##'
##' Generate all the necessary information to plot PCA from a DESeq2 object
##' in which an assay containing a variance-stabilised matrix of counts is
##' stored. Copied from DESeq2::plotPCA, but with additional ability to
##' say which assay to run the PCA on.
##'
##' @param object The DESeq2DataSet object.
##' @param ntop number of top genes to use for principla components, selected by highest row variance.
##' @param assay the name or index of the assay that stores the variance-stabilised data.
##' @return A data.frame containing the projected data alongside the grouping columns.
##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains,
##' and additionally how much the variation within that PC is explained by the grouping variable.
##' @author Gavin Kelly
plotPCA_vst <- function (object, ntop = 500, assay=length(assays(object))) {
rv <- rowVars(assay(object, assay))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca <- prcomp(t(assay(object, assay)[select, ]), center=TRUE, scale=FALSE)
percentVar <- pca$sdev^2/sum(pca$sdev^2)
df <- cbind( as.data.frame(colData(object)), pca$x)
#Order points so extreme samples are more likely to get label
ord <- order(abs(rank(df$PC1)-median(df$PC1)), abs(rank(df$PC2)-median(df$PC2)))
df <- df[ord,]
attr(df, "percentVar") <- data.frame(PC=seq(along=percentVar), percentVar=100*percentVar)
return(df)
}
PlotFile <- paste(opt$outprefix,".plots.pdf",sep="")
pdf(file=PlotFile, onefile=TRUE, width=7, height=7)
## PCA
ntop <- c(500, Inf)
for (n_top_var in ntop) {
pca.data <- plotPCA_vst(dds, assay=vst_name, ntop=n_top_var)
percentVar <- round(attr(pca.data, "percentVar")$percentVar)
plot_subtitle <- ifelse(n_top_var==Inf, "All genes", paste("Top", n_top_var, "genes"))
pl <- ggplot(pca.data, aes(PC1, PC2, label=paste0(" ", sample, " "))) +
geom_point() +
geom_text(check_overlap=TRUE, vjust=0.5, hjust="inward") +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) +
theme(legend.position="top",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=1))
print(pl)
if (decompose) {
pc_names <- paste0("PC", attr(pca.data, "percentVar")$PC)
long_pc <- reshape(pca.data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow")
long_pc <- subset(long_pc, component<=5)
long_pc_grp <- reshape(long_pc, varying=names(groupings), direction="long", sep="", timevar="grouper")
long_pc_grp <- subset(long_pc_grp, grouper<=5)
long_pc_grp$component <- paste("PC", long_pc_grp$component)
long_pc_grp$grouper <- paste0(long_pc_grp$grouper, c("st","nd","rd","th","th")[long_pc_grp$grouper], " prefix")
pl <- ggplot(long_pc_grp, aes(x=Group, y=PC)) +
geom_point() +
stat_summary(fun=mean, geom="line", aes(group = 1)) +
labs(x=NULL, y=NULL, subtitle = plot_subtitle, title="PCs split by sample-name prefixes") +
facet_grid(component~grouper, scales="free_x") +
scale_x_discrete(guide = guide_axis(n.dodge = 3))
print(pl)
}
} # at end of loop, we'll be using the user-defined ntop if any, else all genes
## WRITE PC1 vs PC2 VALUES TO FILE
pca.vals <- pca.data[,c("PC1","PC2")]
colnames(pca.vals) <- paste0(colnames(pca.vals), ": ", percentVar[1:2], '% variance')
pca.vals <- cbind(sample = rownames(pca.vals), pca.vals)
write.table(pca.vals, file = paste(opt$outprefix, ".pca.vals.txt", sep=""),
row.names = FALSE, col.names = TRUE, sep = "\t", quote = TRUE)
## SAMPLE CORRELATION HEATMAP
sampleDists <- dist(t(assay(dds, vst_name)))
sampleDistMatrix <- as.matrix(sampleDists)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(
sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors,
main=paste("Euclidean distance between", vst_name, "of samples")
)
## WRITE SAMPLE DISTANCES TO FILE
write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix),file=paste(opt$outprefix, ".sample.dists.txt", sep=""),
row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE)
dev.off()
################################################
################################################
## SAVE SIZE FACTORS ##
################################################
################################################
SizeFactorsDir <- "size_factors/"
if (file.exists(SizeFactorsDir) == FALSE) {
dir.create(SizeFactorsDir, recursive=TRUE)
}
NormFactorsFile <- paste(SizeFactorsDir,opt$outprefix, ".size_factors.RData", sep="")
normFactors <- sizeFactors(dds)
save(normFactors, file=NormFactorsFile)
for (name in names(sizeFactors(dds))) {
sizeFactorFile <- paste(SizeFactorsDir,name, ".txt", sep="")
write(as.numeric(sizeFactors(dds)[name]), file=sizeFactorFile)
}
################################################
################################################
## R SESSION INFO ##
################################################
################################################
RLogFile <- "R_sessionInfo.log"
sink(RLogFile)
a <- sessionInfo()
print(a)
sink()
################################################
################################################
################################################
################################################

View File

@@ -0,0 +1,277 @@
name: "dupradar"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "path to input alignment file in BAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf_annotation"
description: "path to GTF annotation file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--paired"
description: "add flag if input alignment file consists of paired reads"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "strandedness of input bam file reads (forward, reverse or unstranded\
\ (default, applicable to paired reads))"
info: null
required: false
choices:
- "forward"
- "reverse"
- "unstranded"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_dupmatrix"
description: "path to output file (txt) of duplicate tag counts"
info: null
default:
- "$id.dup_matrix.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_dup_intercept_mqc"
description: "path to output file (txt) of multiqc intercept value DupRadar"
info: null
default:
- "$id.dup_intercept_mqc.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_duprate_exp_boxplot"
description: "path to output file (pdf) of distribution of expression box plot"
info: null
default:
- "$id.duprate_exp_boxplot.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_duprate_exp_densplot"
description: "path to output file (pdf) of 2D density scatter plot of duplicate\
\ tag counts"
info: null
default:
- "$id.duprate_exp_densityplot.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_duprate_exp_denscurve_mqc"
description: "path to output file (pdf) of density curve of gene duplication multiqc"
info: null
default:
- "$id.duprate_exp_density_curve_mqc.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_expression_histogram"
description: "path to output file (pdf) of distribution of RPK values per gene\
\ histogram"
info: null
default:
- "$id.expression_hist.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_intercept_slope"
description: "output file (txt) with progression of duplication rate value"
info: null
default:
- "$id.intercept_slope.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "dupradar.r"
description: "Assessment of duplication rates in RNA-Seq datasets\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
- type: "file"
path: "genes.gtf"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/dupradar.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "r-base"
interactive: false
- type: "r"
bioc:
- "dupRadar"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/dupradar/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/dupradar"
executable: "target/executable/dupradar/dupradar"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env Rscript
# Command line argument processing
args = commandArgs(trailingOnly=TRUE)
if (length(args) < 5) {
stop("Usage: dupRadar.r <input.bam> <sample_id> <annotation.gtf> <strandDirection:0=unstranded/1=forward/2=reverse> <paired/single> <nbThreads> <R-package-location (optional)>", call.=FALSE)
}
message("paired_end is", args[5])
message("the type is is", class(args[5]))
input_bam <- args[1]
output_prefix <- args[2]
annotation_gtf <- args[3]
stranded <- as.numeric(args[4])
paired_end <- ifelse(args[5] == "true", TRUE, FALSE)
threads <- as.numeric(args[6])
bamRegex <- "(.+)\\.bam$"
if(!(grepl(bamRegex, input_bam) && file.exists(input_bam) && (!file.info(input_bam)$isdir))) stop("First argument '<input.bam>' must be an existing file (not a directory) with '.bam' extension...")
if(!(file.exists(annotation_gtf) && (!file.info(annotation_gtf)$isdir))) stop("Third argument '<annotation.gtf>' must be an existing file (and not a directory)...")
if(is.na(stranded) || (!(stranded %in% (0:2)))) stop("Fourth argument <strandDirection> must be a numeric value in 0(unstranded)/1(forward)/2(reverse)...")
if(is.na(threads) || (threads<=0)) stop("Fifth argument <nbThreads> must be a strictly positive numeric value...")
# Debug messages (stderr)
message("Input bam (Arg 1): ", input_bam)
message("Output basename(Arg 2): ", output_prefix)
message("Input gtf (Arg 3): ", annotation_gtf)
message("Strandness (Arg 4): ", c("unstranded", "forward", "reverse")[stranded+1])
message("paired_end (Arg 5): ", paired_end)
message("Nb threads (Arg 6): ", threads)
message("R package loc. (Arg 7): ", ifelse(length(args) > 4, args[5], "Not specified"))
# Load / install packages
if (length(args) > 5) { .libPaths( c( args[6], .libPaths() ) ) }
if (!require("dupRadar")){
source("http://bioconductor.org/biocLite.R")
biocLite("dupRadar", suppressUpdates=TRUE)
library("dupRadar")
}
if (!require("parallel")) {
install.packages("parallel", dependencies=TRUE, repos='http://cloud.r-project.org/')
library("parallel")
}
# Duplicate stats
dm <- analyzeDuprates(input_bam, annotation_gtf, stranded, paired_end, threads)
write.table(dm, file=paste(output_prefix, "_dupMatrix.txt", sep=""), quote=F, row.name=F, sep="\t")
# 2D density scatter plot
pdf(paste0(output_prefix, "_duprateExpDens.pdf"))
duprateExpDensPlot(DupMat=dm)
title("Density scatter plot")
mtext(output_prefix, side=3)
dev.off()
fit <- duprateExpFit(DupMat=dm)
cat(
paste("- dupRadar Int (duprate at low read counts):", fit$intercept),
paste("- dupRadar Sl (progression of the duplication rate):", fit$slope),
fill=TRUE, labels=output_prefix,
file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE
)
# Create a multiqc file dupInt
sample_name <- gsub("Aligned.sortedByCoord.out.markDups", "", output_prefix)
line="#id: DupInt
#plot_type: 'generalstats'
#pconfig:
# dupRadar_intercept:
# title: 'dupInt'
# namespace: 'DupRadar'
# description: 'Intercept value from DupRadar'
# max: 100
# min: 0
# scale: 'RdYlGn-rev'
# format: '{:.2f}%'
Sample dupRadar_intercept"
write(line,file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE)
write(paste(sample_name, fit$intercept),file=paste0(output_prefix, "_dup_intercept_mqc.txt"),append=TRUE)
# Get numbers from dupRadar GLM
curve_x <- sort(log10(dm$RPK))
curve_y = 100*predict(fit$glm, data.frame(x=curve_x), type="response")
# Remove all of the infinite values
infs = which(curve_x %in% c(-Inf,Inf))
curve_x = curve_x[-infs]
curve_y = curve_y[-infs]
# Reduce number of data points
curve_x <- curve_x[seq(1, length(curve_x), 10)]
curve_y <- curve_y[seq(1, length(curve_y), 10)]
# Convert x values back to real counts
curve_x = 10^curve_x
# Write to file
line="#id: dupradar
#section_name: 'DupRadar'
#section_href: 'bioconductor.org/packages/release/bioc/html/dupRadar.html'
#description: \"provides duplication rate quality control for RNA-Seq datasets. Highly expressed genes can be expected to have a lot of duplicate reads, but high numbers of duplicates at low read counts can indicate low library complexity with technical duplication.
# This plot shows the general linear models - a summary of the gene duplication distributions. \"
#pconfig:
# title: 'DupRadar General Linear Model'
# xLog: True
# xlab: 'expression (reads/kbp)'
# ylab: '% duplicate reads'
# ymax: 100
# ymin: 0
# tt_label: '<b>{point.x:.1f} reads/kbp</b>: {point.y:,.2f}% duplicates'
# xPlotLines:
# - color: 'green'
# dashStyle: 'LongDash'
# label:
# style: {color: 'green'}
# text: '0.5 RPKM'
# verticalAlign: 'bottom'
# y: -65
# value: 0.5
# width: 1
# - color: 'red'
# dashStyle: 'LongDash'
# label:
# style: {color: 'red'}
# text: '1 read/bp'
# verticalAlign: 'bottom'
# y: -65
# value: 1000
# width: 1"
write(line,file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),append=TRUE)
write.table(
cbind(curve_x, curve_y),
file=paste0(output_prefix, "_duprateExpDensCurve_mqc.txt"),
quote=FALSE, row.names=FALSE, col.names=FALSE, append=TRUE,
)
# Distribution of expression box plot
pdf(paste0(output_prefix, "_duprateExpBoxplot.pdf"))
duprateExpBoxplot(DupMat=dm)
title("Percent Duplication by Expression")
mtext(output_prefix, side=3)
dev.off()
# Distribution of RPK values per gene
pdf(paste0(output_prefix, "_expressionHist.pdf"))
expressionHist(DupMat=dm)
title("Distribution of RPK values per gene")
mtext(output_prefix, side=3)
dev.off()
# Print sessioninfo to standard out
print(output_prefix)
citation("dupRadar")
sessionInfo()

View File

@@ -0,0 +1,209 @@
name: "fastqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "boolean"
name: "--paired"
description: "Paired fastq files or not?"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "Input fastq files, either one or two (paired)"
info: null
example:
- "sample.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ","
- name: "Output"
arguments:
- type: "file"
name: "--fastqc_html_1"
description: "FastQC HTML report for read 1."
info: null
default:
- "$id.read_1.fastqc.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_html_2"
description: "FastQC HTML report for read 2."
info: null
default:
- "$id.read_2.fastqc.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_zip_1"
description: "FastQC report archive for read 1."
info: null
default:
- "$id.read_1.fastqc.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_zip_2"
description: "FastQC report archive for read 2."
info: null
default:
- "$id.read_2.fastqc.zip"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\
\ This component can take one or more files (by means of shell globbing) or a complete\
\ directory.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/fastqc/main.nf"
- "modules/nf-core/fastqc/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "fastqc"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/fastqc/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/fastqc"
executable: "target/executable/fastqc/fastqc"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

1271
target/executable/fastqc/fastqc Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,188 @@
name: "fq_subsample"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "Input fastq files to subsample"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--extra_args"
description: "Extra arguments to pass to fq subsample"
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Input"
arguments:
- type: "file"
name: "--output_1"
description: "Sampled read 1 fastq files"
info: null
default:
- "$id.read_1.subsampled.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_2"
description: "Sampled read 2 fastq files"
info: null
default:
- "$id.read_2.subsampled.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "fq subsample outputs a subset of records from single or paired FASTQ\
\ files. This requires a seed (--seed) to be set in ext.args\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/fq/subsample/main.nf"
- "modules/nf-core/fq/subsample/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\
\ && \\\napt-get update && \\\napt-get install -y --no-install-recommends build-essential\
\ git-all curl && \\\ncurl https://sh.rustup.rs -sSf | sh -s -- -y && \\\n.\
\ \"$HOME/.cargo/env\" && \\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git\
\ && \\\nmv fq /usr/local/ && cd /usr/local/fq && \\\ncargo install --locked\
\ --path . && \\\nmv /usr/local/fq/target/release/fq /usr/local/bin/\n"
env:
- "TZ=Europe/Brussels"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/fq_subsample/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/fq_subsample"
executable: "target/executable/fq_subsample/fq_subsample"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,178 @@
name: "getchromsizes"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--fasta"
description: "Genome fasta files"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--sizes"
description: "File containing chromosome lengths"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fai"
description: "FASTA index file"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gzi"
description: "Optional gzip index file for compressed inputs"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Generates a FASTA file of chromosome sizes and a fasta index file.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genome.fasta"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/custom/getchromsizes/main.nf"
- "modules/nf-core/custom/getchromsizes/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y autoconf automake make gcc perl zlib1g-dev\
\ libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libncurses5-dev curl\
\ bzip2 && \\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2\
\ -o samtools-1.18.tar.bz2 && \\\ntar -xjf samtools-1.18.tar.bz2 && \\\nrm samtools-1.18.tar.bz2\
\ && \\\ncd samtools-1.18 && \\\n./configure && \\\nmake && \\\nmake install\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/getchromsizes/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/getchromsizes"
executable: "target/executable/getchromsizes/getchromsizes"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,156 @@
name: "gtf2bed"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--gtf"
description: "A reference file in GTF format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: " Output"
arguments:
- type: "file"
name: "--bed_output"
description: "BED file resulting from the conversion of the GTF input file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "gtf2bed.pl"
description: "Create BED annotation file from GTF.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genes.gtf.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/gtf2bed.nf"
last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "perl"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/gtf2bed/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/gtf2bed"
executable: "target/executable/gtf2bed/gtf2bed"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

1078
target/executable/gtf2bed/gtf2bed Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env perl
# Copyright (c) 2011 Erik Aronesty (erik@q32.com)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT.
use Getopt::Long;
my $extended;
GetOptions("x"=>\$extended);
$in = shift @ARGV;
my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n";
open IN, $in_cmd;
while (<IN>) {
$gff = 2 if /^##gff-version 2/;
$gff = 3 if /^##gff-version 3/;
next if /^#/ && $gff;
s/\s+$//;
# 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr
my @f = split /\t/;
if ($gff) {
# most ver 2's stick gene names in the id field
($id) = $f[8]=~ /\bID="([^"]+)"/;
# most ver 3's stick unquoted names in the name field
($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3;
} else {
($id) = $f[8]=~ /transcript_id "([^"]+)"/;
}
next unless $id && $f[0];
if ($f[2] eq 'exon') {
die "no position at exon on line $." if ! $f[3];
# gff3 puts :\d in exons sometimes
$id =~ s/:\d+$// if $gff == 3;
push @{$exons{$id}}, \@f;
# save lowest start
$trans{$id} = \@f if !$trans{$id};
} elsif ($f[2] eq 'start_codon') {
#optional, output codon start/stop as "thick" region in bed
$sc{$id}->[0] = $f[3];
} elsif ($f[2] eq 'stop_codon') {
$sc{$id}->[1] = $f[4];
} elsif ($f[2] eq 'miRNA' ) {
$trans{$id} = \@f if !$trans{$id};
push @{$exons{$id}}, \@f;
}
}
for $id (
# sort by chr then pos
sort {
$trans{$a}->[0] eq $trans{$b}->[0] ?
$trans{$a}->[3] <=> $trans{$b}->[3] :
$trans{$a}->[0] cmp $trans{$b}->[0]
} (keys(%trans)) ) {
my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}};
my ($cds, $cde);
($cds, $cde) = @{$sc{$id}} if $sc{$id};
# sort by pos
my @ex = sort {
$a->[3] <=> $b->[3]
} @{$exons{$id}};
my $beg = $ex[0][3];
my $end = $ex[-1][4];
if ($dir eq '-') {
# swap
$tmp=$cds;
$cds=$cde;
$cde=$tmp;
$cds -= 2 if $cds;
$cde += 2 if $cde;
}
# not specified, just use exons
$cds = $beg if !$cds;
$cde = $end if !$cde;
# adjust start for bed
--$beg; --$cds;
my $exn = @ex; # exon count
my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start
my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size
my $gene_id;
my $extend = "";
if ($extended) {
($gene_id) = $attr =~ /gene_name "([^"]+)"/;
($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id;
$extend="\t$gene_id";
}
# added an extra comma to make it look exactly like ucsc's beds
print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n";
}
close IN;

View File

@@ -0,0 +1,166 @@
name: "gtf_filter"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--fasta"
description: "Genome fasta file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "GTF file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--skip_transcript_id_check"
description: "Skip checking for transcript IDs in the GTF file."
info: null
direction: "input"
- name: " Output"
arguments:
- type: "file"
name: "--filtered_gtf"
description: "Filtered GTF file containing only sequences in the FASTA file"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
description: "Filters a GTF file based on sequence names in a FASTA file.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genome.fasta"
- type: "file"
path: "genes.gtf.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/gtf_filter.nf"
last_sha: "1c6012ecbb087014ea4b8f0f3d39b874850277a8"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/gtf_filter/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/gtf_filter"
executable: "target/executable/gtf_filter/gtf_filter"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,155 @@
name: "gunzip"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "Path of file to be uncompressed"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "Decompressed file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Compress or uncompress a file or list of files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genes.gff.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/gunzip/main.nf"
- "modules/nf-core/gunzip/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "gzip"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/gunzip/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/gunzip"
executable: "target/executable/gunzip/gunzip"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

1084
target/executable/gunzip/gunzip Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,166 @@
name: "kallisto_index"
namespace: "kallisto"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--transcriptome_fasta"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--pseudo_aligner_kmer_size"
description: "Kmer length passed to indexing step of pseudoaligners."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--kallisto_index"
info: null
default:
- "Kallisto_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Create Kallisto index.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "transcriptome.fasta"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/kallisto/index/main.nf"
- "modules/nf-core/kallisto/index/meta.yml"
last_sha: "c0816976384d5e7ee6079c29c45958df1ffa0ee4"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
\ /usr/local/bin/\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/kallisto/kallisto_index/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/kallisto/kallisto_index"
executable: "target/executable/kallisto/kallisto_index/kallisto_index"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,264 @@
name: "kallisto_quant"
namespace: "kallisto"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "List of input FastQ files of size 1 and 2 for single-end and paired-end\
\ data, respectively."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "boolean"
name: "--paired"
description: "Paired reads or not."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--index"
description: "Kallisto genome index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "Optional gtf file for translation of transcripts into genomic coordinates."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chromosomes"
description: "Optional tab separated file with chromosome names and lengths."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length"
description: "For single-end mode only, the estimated average fragment length."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_sd"
description: "For single-end mode only, the estimated standard deviation of the\
\ fragment length."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "Kallisto quant results"
info: null
default:
- "$id.kallisto_quant_results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--log"
description: "File containing log information from running kallisto quant"
info: null
default:
- "$id.kallisto_quant.log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--run_info"
description: "A json file containing information about the run"
info: null
default:
- "$id.run_info.json"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--quant_results_file"
description: "TSV file containing abundance estimates from Kallisto"
info: null
default:
- "$id.abundance.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Computes equivalence classes for reads and quantifies abundances.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "transcriptome.fasta"
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/kallisto/quant/main.nf"
- "modules/nf-core/kallisto/quant/meta.yml"
last_sha: "aff1d2e02717247831644769fc3ba84868c3fdde"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
\ /usr/local/bin/\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/kallisto/kallisto_quant/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/kallisto/kallisto_quant"
executable: "target/executable/kallisto/kallisto_quant/kallisto_quant"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
name: "multiqc_custom_biotype"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--biocounts"
description: "File with all biocounts"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--id"
description: "Sample name"
info: null
default:
- "$id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--biotypes_header"
info: null
default:
- "assets/multiqc/biotypes_header.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--featurecounts_multiqc"
info: null
default:
- "$id.biotype_counts_mqc.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--featurecounts_rrna_multiqc"
info: null
default:
- "$id.biotype_counts_rrna_mqc.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "mqc_features_stat.py"
description: "Calculate features percentage for biotype counts"
info: null
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "pip"
interactive: false
- type: "python"
user: false
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/multiqc_custom_biotype/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/multiqc_custom_biotype"
executable: "target/executable/multiqc_custom_biotype/multiqc_custom_biotype"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
import argparse
import logging
import os
# Create a logger
logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s")
logger = logging.getLogger(__file__)
logger.setLevel(logging.INFO)
mqc_main = """#id: 'biotype-gs'
#plot_type: 'generalstats'
#pconfig:"""
mqc_pconf = """# percent_{ft}:
# title: '% {ft}'
# namespace: 'Biotype Counts'
# description: '% reads overlapping {ft} features'
# max: 100
# min: 0
# scale: 'RdYlGn-rev'
# format: '{{:.2f}}%'"""
def mqc_feature_stat(bfile, features, outfile, sname=None):
# If sample name not given use file name
if not sname:
sname = os.path.splitext(os.path.basename(bfile))[0]
# Try to parse and read biocount file
fcounts = {}
try:
with open(bfile, "r") as bfl:
for ln in bfl:
if ln.startswith("#"):
continue
ft, cn = ln.strip().split("\t")
fcounts[ft] = float(cn)
except:
logger.error("Trouble reading the biocount file {}".format(bfile))
return
total_count = sum(fcounts.values())
if total_count == 0:
logger.error("No biocounts found, exiting")
return
# Calculate percentage for each requested feature
fpercent = {f: (fcounts[f] / total_count) * 100 if f in fcounts else 0 for f in features}
if len(fpercent) == 0:
logger.error("Any of given features '{}' not found in the biocount file".format(", ".join(features), bfile))
return
# Prepare the output strings
out_head, out_value, out_mqc = ("Sample", "'{}'".format(sname), mqc_main)
for ft, pt in fpercent.items():
out_head = "{}\tpercent_{}".format(out_head, ft)
out_value = "{}\t{}".format(out_value, pt)
out_mqc = "{}\n{}".format(out_mqc, mqc_pconf.format(ft=ft))
# Write the output to a file
with open(outfile, "w") as ofl:
out_final = "\n".join([out_mqc, out_head, out_value]).strip()
ofl.write(out_final + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="""Calculate features percentage for biotype counts""")
parser.add_argument("biocount", type=str, help="File with all biocounts")
parser.add_argument(
"-f",
"--features",
dest="features",
required=True,
nargs="+",
help="Features to count",
)
parser.add_argument("-s", "--sample", dest="sample", type=str, help="Sample Name")
parser.add_argument(
"-o",
"--output",
dest="output",
default="biocount_percent.tsv",
type=str,
help="Sample Name",
)
args = parser.parse_args()
mqc_feature_stat(args.biocount, args.features, args.output, args.sample)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,218 @@
name: "picard_markduplicates"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--bam"
description: "Input BAM file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fasta"
description: "Reference genome FASTA file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fai"
description: "Reference genome FASTA index"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_picard_args"
description: "Additional argument to be passed to Picard MarkDuplicates"
info: null
default:
- "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT\
\ --TMP_DIR tmp"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_bam"
description: "BAM file with duplicate reads marked/removed"
info: null
default:
- "$id.MarkDuplicates.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai"
description: "An optional BAM index file. If desired, --CREATE_INDEX must be passed\
\ as a flag"
info: null
default:
- "$id.MarkDuplicates.bam.bai"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--metrics"
description: "Duplicate metrics file generated by picard"
info: null
default:
- "$id.MarkDuplicates.metrics.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Locate and tag duplicate reads in a BAM file\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "genome.fasta"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/picard/markduplicates/main.nf"
- "modules/nf-core/picard/markduplicates/meta.yml"
last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\
\ && \\\nwget --no-check-certificate https://github.com/broadinstitute/picard/releases/download/3.1.1/picard.jar\
\ && \\\nmv picard.jar /usr/local/bin \n"
env:
- "PICARD=/usr/local/bin/picard.jar"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/picard_markduplicates/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/picard_markduplicates"
executable: "target/executable/picard_markduplicates/picard_markduplicates"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,420 @@
name: "prepare_multiqc_input"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--fail_trimming_multiqc"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--fail_mapping_multiqc"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--fail_strand_multiqc"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_raw_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--fastqc_trim_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--trim_log_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--sortmerna_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--star_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--salmon_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--samtools_stats"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--samtools_flagstat"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--samtools_idxstats"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--markduplicates_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--pseudo_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--featurecounts_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--featurecounts_rrna_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--aligner_pca_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--aligner_clustering_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--pseudo_aligner_pca_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--pseudo_aligner_clustering_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--preseq_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--qualimap_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--dupradar_output_dup_intercept_mqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--dupradar_output_duprate_exp_denscurve_mqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--bamstat_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--inferexperiment_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--innerdistance_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--junctionannotation_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--junctionsaturation_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--readdistribution_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--readduplication_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--tin_multiqc"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--multiqc_config"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Ouput"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "multiqc_input"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Prepare directory with all the input files for MultiQC.\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/prepare_multiqc_input/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/prepare_multiqc_input"
executable: "target/executable/prepare_multiqc_input/prepare_multiqc_input"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,149 @@
name: "preprocess_transcripts_fasta"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--transcript_fasta"
description: "Path of transcripts FASTA file"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "Path of processed output FASTA file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Process transcripts FASTA if GTF file is GENOCODE format\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "transcriptome.fasta"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/preprocess_transcripts_fasta_gencode.nf"
last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/preprocess_transcripts_fasta/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/preprocess_transcripts_fasta"
executable: "target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,202 @@
name: "preseq_lcextrap"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "Input genome BAM/BED file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_preseq_args"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--paired"
description: "Paired-end reads?"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "$id.lc_extrap.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Computing the expected future yield of distinct reads and bounds on\
\ the number of total distinct reads in the library and the associated confidence\
\ intervals."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "a.sorted.bed"
- type: "file"
path: "SRR1106616_5M_subset.bam"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/preseq/lcextrap/main.nf"
- "modules/nf-core/preseq/lcextrap/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "curl"
- "bzip2"
- "build-essential"
- "wget"
- "gcc"
- "autoconf"
- "automake"
- "make"
- "libz-dev"
- "libbz2-dev"
- "zlib1g-dev"
- "libncurses5-dev"
- "libncursesw5-dev"
- "liblzma-dev"
- "pip"
interactive: false
- type: "docker"
run:
- "cd /usr/bin && \\\nwget --no-check-certificate https://github.com/smithlabcode/preseq/releases/download/v3.2.0/preseq-3.2.0.tar.gz\
\ && \\\nwget https://github.com/samtools/htslib/releases/download/1.9/htslib-1.9.tar.bz2\
\ && \\\nwget --no-check-certificate https://github.com/arq5x/bedtools2/releases/download/v2.31.0/bedtools.static\
\ && \\\ncurl -fsSL https://github.com/samtools/samtools/releases/download/1.18/samtools-1.18.tar.bz2\
\ -o samtools-1.18.tar.bz2 && \\\ntar -xjf samtools-1.18.tar.bz2 && rm samtools-1.18.tar.bz2\
\ && \\\ntar -xzf preseq-3.2.0.tar.gz && rm preseq-3.2.0.tar.gz && \\\ntar -vxjf\
\ htslib-1.9.tar.bz2 && rm htslib-1.9.tar.bz2 && \\\nmv bedtools.static /usr/local/bin/bedtools\
\ && \\\nchmod a+x /usr/local/bin/bedtools && \\\ncd samtools-1.18 && \\\n./configure\
\ && \\\nmake && \\\nmake install && \\\ncd /usr/bin && cd htslib-1.9 && \\\n\
make && \\\ncd /usr/bin && cd preseq-3.2.0 && \\\nmkdir build && cd build &&\
\ \\\n../configure && \\\nmake && make install && make HAVE_HTSLIB=1 all \n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/preseq_lcextrap/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/preseq_lcextrap"
executable: "target/executable/preseq_lcextrap/preseq_lcextrap"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,282 @@
name: "qualimap"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "path to input mapping file in BAM format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "path to annotations file in Ensembl GTF format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_dir"
description: "path to output directory for raw data and report."
info: null
default:
- "$id.qualimap_output"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_pdf"
description: "path to output file for pdf report."
info: null
default:
- "$id.report.pdf"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_format"
description: "Format of the output report (PDF or HTML, default is HTML)"
info: null
default:
- "html"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Optional"
arguments:
- type: "integer"
name: "--pr_bases"
description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\
\ bias (default = 100)."
info: null
default:
- 100
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--tr_bias"
description: "Number of top highly expressed transcripts to compute 5'-3' bias\
\ (default = 1000)."
info: null
default:
- 1000
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--algorithm"
description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)."
info: null
default:
- "uniquely-mapped-reads"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sequencing_protocol"
description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\
\ or non-strand-specific (default))."
info: null
default:
- "non-strand-specific"
required: false
choices:
- "non-strand-specific"
- "strand-specific-reverse"
- "strand-specific-forward"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--paired"
description: "Setting this flag for paired-end experiments will result in counting\
\ fragments instead of reads."
info: null
direction: "input"
- type: "boolean_true"
name: "--sorted"
description: "Setting this flag indicates that the input file is already sorted\
\ by name. If flag is not set, additional sorting by name will be performed.\
\ Only requiredfor paired-end analysis."
info: null
direction: "input"
- type: "string"
name: "--java_memory_size"
description: "maximum Java heap memory size, default = 4G."
info: null
default:
- "4G"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "RNA-seq QC analysis using the qualimap \n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
- type: "file"
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai"
- type: "file"
path: "genes.gtf"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/qualimap/rnaseq/main.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "r-base"
- "unzip"
- "wget"
- "openjdk-8-jdk"
- "libxml2-dev"
- "libcurl4-openssl-dev"
interactive: false
- type: "docker"
run:
- "wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip &&\
\ \\\nunzip qualimap_v2.3.zip && \\\ncp -a qualimap_v2.3/. usr/bin && \\\nunset\
\ DISPLAY && \\\nmkdir -p tmp && \\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n"
- type: "r"
cran:
- "optparse"
bioc:
- "NOISeqr"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/qualimap/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/qualimap"
executable: "target/executable/qualimap/qualimap"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,319 @@
name: "rsem_calculate_expression"
namespace: "rsem"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse"
info: null
required: false
choices:
- "forward"
- "reverse"
- "unstranded"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--paired"
description: "Paired-end reads or not?"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "Input reads for quantification."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--index"
description: "RSEM index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_args"
description: "Extra rsem-calculate-expression arguments in addition to the defaults."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--versions"
info: null
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--counts_gene"
description: "Expression counts on gene level"
info: null
example:
- "sample.genes.results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcripts"
description: "Expression counts on transcript level"
info: null
example:
- "sample.isoforms.results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stat"
description: "RSEM statistics"
info: null
example:
- "sample.stat"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--logs"
description: "RSEM logs"
info: null
example:
- "sample.log"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_star"
description: "BAM file generated by STAR (optional)"
info: null
example:
- "sample.STAR.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_genome"
description: "Genome BAM file (optional)"
info: null
example:
- "sample.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_transcript"
description: "Transcript BAM file (optional)"
info: null
example:
- "sample.transcript.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculate expression with RSEM.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
- type: "file"
path: "rsem.tar.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rsem/calculateexpression/main.nf"
- "modules/nf-core/rsem/calculateexpression/meta.yml"
last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "build-essential"
- "gcc"
- "g++"
- "make"
- "wget"
- "zlib1g-dev"
- "unzip"
- "xxd"
- "perl"
- "r-base"
- "bowtie2"
- "python3-pip"
- "git"
interactive: false
- type: "docker"
run:
- "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\
\ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\
\ && \\\nunzip ${STAR_VERSION}.zip && \\\ncd STAR-${STAR_VERSION}/source &&\
\ \\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\ncp STAR /usr/local/bin\
\ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip\
\ && \\\nunzip v${RSEM_VERSION}.zip && \\\ncd RSEM-${RSEM_VERSION} && \\\nmake\
\ && \\\nmake install && \\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\
\ && \\\nrm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \\\n\
cd && \\\napt-get clean && \\\necho 'export PATH=$PATH:/usr/local/bin' >> /etc/profile\
\ && \\\necho 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \\\n/bin/bash\
\ -c \"source /etc/profile && source ~/.bashrc && echo $PATH && which STAR\"\
\n"
env:
- "STAR_VERSION=2.7.11b"
- "RSEM_VERSION=1.3.3"
- "TZ=Europe/Brussels"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rsem/rsem_calculate_expression/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rsem/rsem_calculate_expression"
executable: "target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,213 @@
name: "rsem_merge_counts"
namespace: "rsem"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--counts_gene"
description: "Expression counts on gene level (genes)"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcripts"
description: "Expression counts on transcript level (isoforms)"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--versions"
info: null
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--merged_gene_counts"
description: "File containing gene counts across all samples."
info: null
default:
- "rsem.merged.gene_counts.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--merged_gene_tpm"
description: "File containing gene TPM across all samples."
info: null
default:
- "rsem.merged.gene_tpm.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--merged_transcript_counts"
description: "File containing transcript counts across all samples."
info: null
default:
- "rsem.merged.transcript_counts.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--merged_transcript_tpm"
description: "File containing transcript TPM across all samples."
info: null
default:
- "rsem.merged.transcript_tpm.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--updated_versions"
info: null
default:
- "versions.yml"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Merge the transcript quantification results obtained from rsem calculate-expression\
\ across all samples.\n"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/rsem_merge_counts/main.nf"
last_sha: "311279532694ce7520164ce4d65a388c0cd11f60"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rsem/rsem_merge_counts/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rsem/rsem_merge_counts"
executable: "target/executable/rsem/rsem_merge_counts/rsem_merge_counts"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,174 @@
name: "rseqc_bamstat"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
info: null
default:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "output file (txt) with mapping quality statistics"
info: null
default:
- "$id.mapping_quality.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Generate statistics from a bam file.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/bamstat/main.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_bamstat/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_bamstat"
executable: "target/executable/rseqc/rseqc_bamstat/rseqc_bamstat"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,197 @@
name: "rseqc_inferexperiment"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refgene"
description: "Reference gene model in bed format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sample_size"
description: "Numer of reads sampled from SAM/BAM file, default = 200000."
info: null
default:
- 200000
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
info: null
default:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "output file (txt) of strandness report"
info: null
default:
- "$id.strandedness.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Infer strandedness from sequencing reads\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "test.bed12"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/inferexperiment/main.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_inferexperiment/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_inferexperiment"
executable: "target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,283 @@
name: "rseqc_innerdistance"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refgene"
description: "Reference gene model in bed format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sample_size"
description: "Numer of reads sampled from SAM/BAM file, default = 200000."
info: null
default:
- 200000
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
info: null
default:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--lower_bound_size"
description: "Lower bound of inner distance (bp). This option is used for ploting\
\ histograme, default=-250."
info: null
default:
- -250
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--upper_bound_size"
description: "Upper bound of inner distance (bp). This option is used for ploting\
\ histograme, default=250."
info: null
default:
- 250
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--step_size"
description: "Step size (bp) of histograme. This option is used for plotting histogram,\
\ default=5."
info: null
default:
- 5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_stats"
description: "output file (txt) with summary statistics of inner distances of\
\ paired reads"
info: null
default:
- "$id.inner_distance.stats"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_dist"
description: "output file (txt) with inner distances of all paired reads"
info: null
default:
- "$id.inner_distance.txt"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_freq"
description: "output file (txt) with frequencies of inner distances of all paired\
\ reads"
info: null
default:
- "$id.inner_distance_freq.txt"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_plot"
description: "output file (pdf) with histogram plot of of inner distances of all\
\ paired reads"
info: null
default:
- "$id.inner_distance_plot.pdf"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_plot_r"
description: "output file (R) with script of histogram plot of of inner distances\
\ of all paired reads"
info: null
default:
- "$id.inner_distance_plot.r"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculate inner distance between read pairs. \n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "test.bed12"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/innerdistance/main.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
- "r-base"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_innerdistance/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_innerdistance"
executable: "target/executable/rseqc/rseqc_innerdistance/rseqc_innerdistance"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,271 @@
name: "rseqc_junctionannotation"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refgene"
description: "Reference gene model in bed format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
info: null
default:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_intron"
description: "Minimum intron length (bp), default = 50."
info: null
default:
- 50
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_log"
description: "output log of junction annotation script"
info: null
default:
- "$id.junction_annotation.log"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_plot_r"
description: "r script to generate splice_junction and splice_events plot"
info: null
default:
- "$id.junction_annotation_plot.r"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_junction_bed"
description: "junction annotation file (bed format)"
info: null
default:
- "$id.junction_annotation.bed"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_junction_interact"
description: "interact file (bed format) of junctions. Can be uploaded to UCSC\
\ genome browser or converted to bigInteract (using bedToBigBed program) for\
\ visualization."
info: null
default:
- "$id.junction_annotation.Interact.bed"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_junction_sheet"
description: "junction annotation file (xls format)"
info: null
default:
- "$id.junction_annotation.xls"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_splice_events_plot"
description: "plot of splice events (pdf)"
info: null
default:
- "$id.splice_events.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_splice_junctions_plot"
description: "plot of junctions (pdf)"
info: null
default:
- "$id.splice_junctions_plot.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Compare detected splice junctions to reference gene model.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "test.bed12"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/junctionannotation/main.nf"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
- "r-base"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_junctionannotation/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_junctionannotation"
executable: "target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,260 @@
name: "rseqc_junctionsaturation"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refgene"
description: "Reference gene model in bed format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sampling_percentile_lower_bound"
description: "Sampling starts from this percentile, must be an integer between\
\ 0 and 100, default =5."
info: null
default:
- 5
required: false
min: 0
max: 100
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sampling_percentile_upper_bound"
description: "Sampling ends at this percentile, must be an integer between 0 and\
\ 100, default =5."
info: null
default:
- 100
required: false
min: 0
max: 100
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sampling_percentile_step"
description: "Sampling frequency in %. Smaller value means more sampling times.\
\ Must be an integer between 0 and 100, default = 5."
info: null
default:
- 5
required: false
min: 0
max: 100
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_intron"
description: "Minimum intron length (bp), default = 50."
info: null
default:
- 50
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_splice_read"
description: "Minimum number of supporting reads to call a junction, default =\
\ 1."
info: null
default:
- 1
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
info: null
default:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_plot_r"
description: "r script to generate junction_saturation_plot plot"
info: null
default:
- "$id.junction_saturation_plot.r"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_plot"
description: "plot of junction saturation (pdf)"
info: null
default:
- "$id.junction_saturation_plot.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Compare detected splice junctions to reference gene model.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "test.bed"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/junctionsaturation/main.nf"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
- "r-base"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_junctionsaturation/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_junctionsaturation"
executable: "target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,173 @@
name: "rseqc_readdistribution"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refgene"
description: "Reference gene model in bed format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "output file (txt) of read distribution analysis."
info: null
default:
- "$id.read_distribution.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculate how mapped reads are distributed over genomic features.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "test.bed12"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/readdistribution/main.nf"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_readdistribution/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_readdistribution"
executable: "target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,222 @@
name: "rseqc_readduplication"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_count_upper_limit"
description: "Upper limit of reads' occurence. Only used for plotting, default\
\ = 500 (times)."
info: null
default:
- 500
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
info: null
default:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_duplication_rate_plot_r"
description: "R script for generating duplication rate plot"
info: null
default:
- "$id.duplication_rate_plot.r"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_duplication_rate_plot"
description: "duplication rate plot (pdf)"
info: null
default:
- "$id.duplication_rate_plot.pdf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_duplication_rate_mapping"
description: "Summary of mapping-based read duplication"
info: null
default:
- "$id.duplication_rate_mapping.xls"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_duplication_rate_sequence"
description: "Summary of sequencing-based read duplication"
info: null
default:
- "$id.duplication_rate_sequencing.xls"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculate read duplication rate.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/readduplication/main.nf"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
- "r-base"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/rseqc_readduplication/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_readduplication"
executable: "target/executable/rseqc/rseqc_readduplication/rseqc_readduplication"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
name: "rseqc_tin"
namespace: "rseqc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--bam_input"
description: "Path to input alignment file in BAM or SAM format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai_input"
description: "Path to bam index file in bai format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refgene"
description: "BED file containing the reference gene model"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--minimum_coverage"
description: "Minimum number of reads mapped to a transcript, default = 10."
info: null
default:
- 10
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sample_size"
description: "Number of equal-spaced nucleotide positions picked from mRNA. Note,\
\ if this number is larger than the length of mRNA (L), it will be halved until\
\ it's smaller than L (default = 100)"
info: null
default:
- 100
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--subtract_background"
description: "Set flag to subtract background noise (estimated from intronic reads).\
\ Only use this option if there are substantial intronic reads."
info: null
direction: "input"
- name: "Output"
arguments:
- type: "file"
name: "--output_tin_summary"
description: "summary statistics (txt) of calculated TIN metrics"
info: null
default:
- "$id.tin_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_tin"
description: "file with TIN metrics (xls)"
info: null
default:
- "$id.tin.xls"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculte TIN (transcript integrity number) from RNA-seq reads\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
- type: "file"
path: "test.paired_end.sorted.bam.bai"
- type: "file"
path: "test.bed12"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/tin/main.nf"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
interactive: false
- type: "docker"
run:
- "pip3 install RSeQC\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rseqc/tin/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rseqc/rseqc_tin"
executable: "target/executable/rseqc/rseqc_tin/rseqc_tin"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,209 @@
name: "sortmerna"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "boolean"
name: "--paired"
description: "Are the reads single-end or paired-end"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "Input fastq"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--ribo_database_manifest"
description: "Text file containing paths to fasta files (one per line) that will\
\ be used to create the database for SortMeRNA."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--sortmerna_log"
description: "Sortmerna log file."
info: null
default:
- "$id.sortmerna.log"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_1"
description: "Output file for read 1."
info: null
default:
- "$id.$key.read_1.fastq"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Output file for read 2."
info: null
default:
- "$id.$key.read_2.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Local sequence alignment tool for filtering, mapping and clustering.\
\ The main application of SortMeRNA is filtering rRNA from metatranscriptomic data.\
\ SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and\
\ one or multiple rRNA database file(s), and sorts apart aligned and rejected reads\
\ into two files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
- type: "file"
path: "rRNA"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/sortmerna/main.nf"
- "modules/nf-core/sortmerna/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y --no-install-recommends cmake g++\
\ wget && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/sortmerna/sortmerna/releases/download/v4.3.6/sortmerna-4.3.6-Linux.sh\
\ && \\\nbash sortmerna-4.3.6-Linux.sh --skip-license\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/sortmerna/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/sortmerna"
executable: "target/executable/sortmerna/sortmerna"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,227 @@
name: "stringtie"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--strandedness"
description: "Forward or reverse strand?"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--annotation_gtf"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_stringtie_args"
description: "Extra arguments for running StringTie"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--stringtie_ignore_gtf"
description: "Perform reference-guided de novo assembly of transcripts using StringTie,\
\ i.e. don't restrict to those in GTF file."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--transcript_gtf"
info: null
default:
- "$id.$key.transcripts.gtf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--coverage_gtf"
info: null
default:
- "$id.$key.coverage.gtf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--abundance"
info: null
default:
- "$id.$key.abundance.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ballgown"
description: "for running ballgown"
info: null
default:
- "$id.$key.ballgown"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Transcript assembly and quantification for RNA-Seq\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "genes.gtf"
- type: "file"
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/stringtie/stringtie/main.nf"
- "modules/nf-core/stringtie/stringtie/meta.yml"
last_sha: "55398de6ab7577acfe9b1180016a93d7af7eb859"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y build-essential zlib1g wget && \\\n\
wget --no-check-certificate https://github.com/gpertea/stringtie/releases/download/v2.2.1/stringtie-2.2.1.Linux_x86_64.tar.gz\
\ && \\\ntar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \\\ncp stringtie-2.2.1.Linux_x86_64/stringtie\
\ /usr/local/bin/\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stringtie/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stringtie"
executable: "target/executable/stringtie/stringtie"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,210 @@
name: "summarizedexperiment"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--tpm_gene"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene_length_scaled"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene_scaled"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tpm_transcript"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcript"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tx2gene_tsv"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "merged_summarizedexperiment"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "summarizedexperiment.r"
description: "Create SummarizedExperiment object from Salmon counts"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/summarizedexperiment/main.nf"
last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "r-base"
- "libcurl4-openssl-dev"
interactive: false
- type: "r"
bioc:
- "SummarizedExperiment"
- "tximeta"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/summarizedexperiment/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/summarizedexperiment"
executable: "target/executable/summarizedexperiment/summarizedexperiment"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env Rscript
library(SummarizedExperiment)
## Create SummarizedExperiment (se) object from Salmon counts
args <- commandArgs(trailingOnly = TRUE)
if (length(args) < 2) {
stop("Usage: salmon_se.r <coldata> <counts> <tpm>", call. = FALSE)
}
coldata <- args[1]
counts_fn <- args[2]
tpm_fn <- args[3]
tx2gene <- args[4]
info <- file.info(tx2gene)
if (info$size == 0) {
tx2gene <- NULL
} else {
rowdata <- read.csv(tx2gene, sep = "\t", header = FALSE)
colnames(rowdata) <- c("tx", "gene_id", "gene_name")
tx2gene <- rowdata[, 1:2]
}
counts <- read.csv(counts_fn, row.names = 1, sep = "\t")
counts <- counts[, 2:ncol(counts), drop = FALSE] # remove gene_name column
tpm <- read.csv(tpm_fn, row.names = 1, sep = "\t")
tpm <- tpm[, 2:ncol(tpm), drop = FALSE] # remove gene_name column
if (length(intersect(rownames(counts), rowdata[["tx"]])) > length(intersect(rownames(counts), rowdata[["gene_id"]]))) {
by_what <- "tx"
} else {
by_what <- "gene_id"
rowdata <- unique(rowdata[, 2:3])
}
if (file.exists(coldata)) {
coldata <- read.csv(coldata, sep = "\t")
coldata <- coldata[match(colnames(counts), coldata[, 1]), ]
coldata <- cbind(files = fns, coldata)
} else {
message("ColData not avaliable ", coldata)
coldata <- data.frame(files = colnames(counts), names = colnames(counts))
}
rownames(coldata) <- coldata[["names"]]
extra <- setdiff(rownames(counts), as.character(rowdata[[by_what]]))
if (length(extra) > 0) {
rowdata <- rbind(
rowdata,
data.frame(
tx = extra,
gene_id = extra,
gene_name = extra
)[, colnames(rowdata)]
)
}
rowdata <- rowdata[match(rownames(counts), as.character(rowdata[[by_what]])), ]
rownames(rowdata) <- rowdata[[by_what]]
se <- SummarizedExperiment(
assays = list(counts = counts, abundance = tpm),
colData = DataFrame(coldata),
rowData = rowdata
)
saveRDS(se, file = paste0(tools::file_path_sans_ext(counts_fn), ".rds"))

View File

@@ -0,0 +1,799 @@
name: "trimgalore"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "Input files. Note that paired-end files need to be supplied in a\
\ pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz"
info: null
example:
- "sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Trimming options"
arguments:
- type: "integer"
name: "--quality"
alternatives:
- "-q"
description: "Trim low-quality ends (below the specified Phred score) from reads\
\ in addition to adapter removal. For RRBS samples, quality trimming will be\
\ performed first, and adapter trimming is carried in a second round. Other\
\ files are quality and adapter trimmed in a single pass. The algorithm is the\
\ same as the one used by BWA (Subtract INT from all qualities; compute partial\
\ sums from all indices to the end of the sequence; cut sequence at the index\
\ at which the sum is minimal)."
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--phred33"
description: "Instructs Cutadapt to use ASCII+33 quality scores as Phred scores\
\ (Sanger/Illumina 1.9+ encoding) for quality trimming."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--phred64"
description: "Instructs Cutadapt to use ASCII+64 quality scores as Phred scores\
\ (Illumina 1.5 encoding) for quality trimming."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--fastqc"
description: "Run FastQC in the default mode on the FastQ file once trimming is\
\ complete."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--fastqc_args"
description: "Passes extra arguments to FastQC. If more than one argument is to\
\ be passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra\
\ arguments will automatically invoke FastQC, so --fastqc does not have to be\
\ specified separately."
info: null
example:
- "--nogroup --outdir /home/"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--adapter"
alternatives:
- "-a"
description: "Adapter sequence to be trimmed. If not specified explicitly, Trim\
\ Galore will try to auto-detect whether the Illumina universal, Nextera transposase\
\ or Illumina small RNA adapter sequence was used. A single base may also be\
\ given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request,\
\ multiple adapters can also be specified like so: \n -a \" AGCTCCCG -a TTTCATTATAT\
\ -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor\
\ so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\
\nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters.\
\ \n example: 20\n"
info: null
example:
- "AGCTCCCG"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--adapter2"
alternatives:
- "-a2"
description: "Optional adapter sequence to be trimmed off read 2 of paired-end\
\ files. This option requires '--paired' to be specified as well. If the libraries\
\ to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5'\
\ adapter automatically (GATCGTCGGACT). A single base may also be given as e.g.\
\ -a2 A{10}, to be expanded to -a2 AAAAAAAAAA."
info: null
example:
- "AGCTCCCG"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--illumina"
description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\
\ universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of\
\ adapter sequence."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--stranded_illumina"
description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\
\ stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default\
\ auto-detection of adapter sequence."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--nextera"
description: "Adapter sequence to be trimmed is the first 12bp of the Nextera\
\ adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--small_rna"
description: "Adapter sequence to be trimmed is the first 12bp of the Illumina\
\ Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection\
\ of adapter sequence. Selecting to trim smallRNA adapters will also lower the\
\ --length value to 18bp. If the smallRNA libraries are paired-end then a automatically\
\ (GATCGTCGGACT) unless -a 2 had been defined explicitly."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--consider_already_trimmed"
description: "During adapter auto-detection, the limit set by this argument allows\
\ the user to set a threshold up to which the file is considered already adapter-trimmed.\
\ If no adapter sequence exceeds this threshold, no additional adapter trimming\
\ will be performed (technically, the adapter is set to '-a X'). Quality trimming\
\ is still performed as usual."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_length"
description: "Discard reads that are longer than the specified value after trimming.\
\ This is only advised for smallRNA sequencing to remove non-small RNA sequences."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--stringency"
description: "Overlap with adapter sequence required to trim a sequence. Defaults\
\ to a very stringent setting of 1, i.e. even a single bp of overlapping sequence\
\ will be trimmed off from the 3' end of any read."
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--error_rate"
alternatives:
- "-e"
description: "Maximum allowed error rate (no. of errors divided by the length\
\ of the matching region)"
info: null
example:
- 0.1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--gzip"
description: "Compress the output file with GZIP. If the input files are GZIP-compressed\
\ the output files will automatically be GZIP compressed as well. As of v0.2.8\
\ the compression will take place on the fly."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--dont_gzip"
description: "Output files won't be compressed with GZIP. This option overrides\
\ --gzip."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--length"
description: "Discard reads that became shorter than the specified length because\
\ of either quality or adapter trimming. A value of '0' effectively disables\
\ this behaviour. For paired-end files, both reads of a read-pair need to be\
\ longer than the specified length to be printed out to validated paired-end\
\ files. If only one read became too short there is the possibility of keeping\
\ such unpaired single-end reads using the --retain_unpaired option."
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_n"
description: "The total number of Ns a read may contain before it will be removed\
\ altogether.In a paired-end setting, either read exceeding this limit will\
\ result in the entire pair being removed from the trimmed output files. If\
\ COUNT is a number between 0 and 1, it is interpreted as a fraction of the\
\ read length."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--trim_n"
description: "Removes Ns from either side of the read. This option does currently\
\ not work in RRBS mode."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--no_report_file"
description: "If specified no report file will be generated."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--suppress_warn"
description: "If specified any output to STDOUT or STDERR will be suppressed."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--clip_R1"
description: "Instructs TrimGalore to remove given number of bp from the 5' end\
\ of read 1 (or single-end reads). This may be useful if the qualities were\
\ very poor, or if there is some sort of unwanted bias at the 5' end."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--clip_R2"
description: "Instructs TrimGalore to remove given number bp from the 5' end of\
\ read 2 (paired-end reads only). This may be useful if the qualities were very\
\ poor, or if there is some sort of unwanted bias at the 5' end. For paired-end\
\ BS-Seq, it is recommended to remove the first few bp because the end-repair\
\ reaction may introduce a bias towards low methylation."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--three_prime_clip_R1"
description: "Instructs Trim Galore to remove spacified number of bp from the\
\ 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has\
\ been performed. This may remove some bias from the 3' end that is not directly\
\ related to adapter sequence or basecall quality."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--three_prime_clip_R2"
description: "Instructs Trim Galore to remove <int> bp from the 3' end of read\
\ 2 AFTER adapter/quality trimming has been performed. This may remove some\
\ unwanted bias from the 3' end that is not directly related to adapter sequence\
\ or basecall quality."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--nextseq"
description: "This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt,\
\ which will set a quality cutoff (that is normally given with -q instead),\
\ but qualities of G bases are ignored. This trimming is in common for the NextSeq-\
\ and NovaSeq-platforms, where basecalls without any signal are called as high-quality\
\ G bases. This is mutually exlusive with '-q INT'."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--basename"
description: "Use specified name (PREFERRED_NAME) as the basename for output files,\
\ instead of deriving the filenames from the input files. Single-end data would\
\ be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz)\
\ and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works\
\ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\
\ longer lists."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--cores"
alternatives:
- "-j"
description: "Number of cores to be used for trimming"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Specific trimming options without adapter/quality trimming"
arguments:
- type: "integer"
name: "--hardtrim5"
description: "Instead of performing adapter-/quality trimming, this option will\
\ simply hard-trim sequences to <int> bp at the 5'-end. Once hard-trimming of\
\ files is complete, Trim Galore will exit. Hard-trimmed output files will end\
\ in .<int>_5prime.fq(.gz)."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--hardtrim3"
description: "Instead of performing adapter-/quality trimming, this option will\
\ simply hard-trim sequences to <int> bp at the 3'-end. Once hard-trimming of\
\ files is complete, Trim Galore will exit. Hard-trimmed output files will end\
\ in .<int>_3prime.fq(.gz)."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--clock"
description: "In this mode, reads are trimmed in a specific way that is currently\
\ used for the Mouse Epigenetic Clock."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--polyA"
description: "This is a new, still experimental, trimming mode to identify and\
\ remove poly-A tails from sequences. When --polyA is selected, Trim Galore\
\ attempts to identify from the first supplied sample whether sequences contain\
\ more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines\
\ if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA\
\ or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary\
\ base from the start of the reads. The auto-detection uses a default of A{20}\
\ for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These\
\ values may be changed manually using the options -a and -a2. In addition to\
\ trimming the sequences, white spaces are replaced with _ and it records in\
\ the read ID how many bases were trimmed so it can later be used to identify\
\ PolyA trimmed sequences. This is currently done by writing tags to both the\
\ start (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming\
\ mode expects that sequences were both adapter and quality before looking\
\ for Poly-A tails, and it is the user's responsibility to carry out an initial\
\ round of trimming."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--implicon"
description: "This is a special mode of operation for paired-end data, such as\
\ required for the IMPLICON method, where a UMI sequence is getting transferred\
\ from the start of Read 2 to the readID of both reads. Following this, Trim\
\ Galore will exit. In it's current implementation, the UMI carrying reads come\
\ in the following format\n Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\
\ 3'\n Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'\nWhere UUUUUUUU is\
\ a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual\
\ fragment to be sequenced. The UMI of Read 2 (R2) is written into the read\
\ ID of both reads and removed from the actual sequence.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "RRBS-specific options"
arguments:
- type: "boolean"
name: "--rrbs"
description: "Specifies that the input file was an MspI digested RRBS sample (recognition\
\ site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed\
\ will have a further 2 bp removed from their 3' end. Sequences which were merely\
\ trimmed because of poor quality will not be shortened further. Read 2 of paired-end\
\ libraries will in addition have the first 2 bp removed from the 5' end (by\
\ setting '--clip_r2 2'). This is to avoid using artificial methylation calls\
\ from the filled-in cytosine positions close to the 3' MspI site in sequenced\
\ fragments. This option is not recommended for users of the Tecan Ovation RRBS\
\ Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--non_directional"
description: "Selecting this option for non-directional RRBS libraries will screen\
\ quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and,\
\ if found, removes the first two basepairs. Like with the option '--rrbs' this\
\ avoids using cytosine positions that were filled-in during the end-repair\
\ step. '--non_directional' requires '--rrbs' to be specified as well. Note\
\ that this option does not set '--clip_r2 2' in paired-end mode."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--keep"
description: "Keep the quality trimmed intermediate file."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Paired-end specific options"
arguments:
- type: "boolean"
name: "--paired"
description: "This option performs length trimming of quality/adapter/RRBS trimmed\
\ reads for paired-end files. To pass the validation test, both sequences of\
\ a sequence pair are required to have a certain minimum length which is governed\
\ by the option --length (see above). If only one read passes this length threshold\
\ the other read can be rescued (see option --retain_unpaired). Using this option\
\ lets you discard too short read pairs without disturbing the sequence-by-sequence\
\ order of FastQ files which is required by many aligners. Trim Galore expects\
\ paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq\
\ SRR2_1.fq.gz SRR2_2.fq.gz ... ."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--retain_unpaired"
description: "If only one of the two paired-end reads became too short, the longer\
\ read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output\
\ files. The length cutoff for unpaired single-end reads is governed by the\
\ parameters -r1/--length_1 and -r2/--length_2."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--length_1"
alternatives:
- "-r1"
description: "Unpaired single-end read length cutoff needed for read 1 to be written\
\ to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode."
info: null
example:
- 35
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--length_2"
alternatives:
- "-r2"
description: "Unpaired single-end read length cutoff needed for read 2 to be written\
\ to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode."
info: null
example:
- 35
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_dir"
alternatives:
- "-o"
description: "If specified all output will be written to this directory instead\
\ of the current directory."
info: null
default:
- "trimmed_output"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimmed_r1"
description: "Output file for read 1. Only works when 1 file (single-end) or 2\
\ files (paired-end) are specified, but not for longer lists."
info: null
example:
- "read_1.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimmed_r2"
description: "Output file for read 2. Only works when 1 file (single-end) or 2\
\ files (paired-end) are specified, but not for longer lists."
info: null
example:
- "read_2.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimming_report_r1"
description: "Trimming report for read 1. Only works when 1 file (single-end)\
\ or 2 files (paired-end) are specified, but not for longer lists."
info: null
example:
- "read_1.trimming_report.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimming_report_r2"
description: "Trimming report for read 1. Only works when 1 file (single-end)\
\ or 2 files (paired-end) are specified, but not for longer lists."
info: null
example:
- "read_2.trimming_report.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimmed_fastqc_html_1"
description: "FastQC report for trimmed (single-end) reads (or read 1 for paired-end).\
\ Only works when 1 file (single-end) or 2 files (paired-end) are specified,\
\ but not for longer lists."
info: null
example:
- "read_1.fastqc.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimmed_fastqc_html_2"
description: "FastQC report for trimmed reads (read2 for paired-end). Only works\
\ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\
\ longer lists."
info: null
example:
- "read_2.fastqc.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimmed_fastqc_zip_1"
description: "FastQC results for trimmed (single-end) reads (or read 1 for paired-end).\
\ Only works when 1 file (single-end) or 2 files (paired-end) are specified,\
\ but not for longer lists."
info: null
example:
- "read_1.fastqc.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trimmed_fastqc_zip_2"
description: "FastQC results for trimmed reads (read2 for paired-end). Only works\
\ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\
\ longer lists."
info: null
example:
- "read_2.fastqc.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--unpaired_r1"
description: "Output file for unpired read 1. Only works when 1 file (single-end)\
\ or 2 files (paired-end) are specified, but not for longer lists."
info: null
example:
- "unpaired_read_1.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--unpaired_r2"
description: "Output file for unpaired read 2. Only works when 1 file (single-end)\
\ or 2 files (paired-end) are specified, but not for longer lists."
info: null
example:
- "unpaired_read_2.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "A wrapper tool around Cutadapt and FastQC to consistently apply quality\
\ and adapter trimming to FastQ files. \n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "trimming"
- "adapters"
license: "GPL-3.0"
links:
repository: "https://github.com/FelixKrueger/TrimGalore"
homepage: "https://github.com/FelixKrueger/TrimGalore"
documentation: "https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo \"TrimGalore: `trim_galore --version | sed -n 's/.*version\\s\\+\\([0-9]\\\
+\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p'`\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/trimgalore/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/trimgalore"
executable: "target/executable/trimgalore/trimgalore"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,203 @@
name: "tx2gene"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--quant_results"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--gtf"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gtf_extra_attributes"
info: null
default:
- "gene_name"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gtf_group_features"
info: null
default:
- "gene_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quant_type"
description: "Method used for quantification"
info: null
required: false
choices:
- "salmon"
- "kallisto"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--tsv"
info: null
default:
- "tx2gene.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--updated_versions"
info: null
default:
- "versions.yml"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "tx2gene.py"
description: "Get transcript id (tx) to gene names for tximport"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/tx2gene/main.nf"
last_sha: "839ac5cab892504514cc96d44e99e70516b239d2"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "pip"
- "unzip"
interactive: false
- type: "python"
user: false
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/tx2gene/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/tx2gene"
executable: "target/executable/tx2gene/tx2gene"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

1254
target/executable/tx2gene/tx2gene Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
#!/usr/bin/env python
# Written by Lorena Pantano with subsequent reworking by Jonathan Manning. Released under the MIT license.
import logging
import argparse
import glob
import os
import re
from collections import Counter, defaultdict, OrderedDict
from collections.abc import Set
from typing import Dict
# Configure logging
logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def read_top_transcripts(quant_dir: str, file_pattern: str) -> Set[str]:
"""
Read the top 100 transcripts from the quantification file.
Parameters:
quant_dir (str): Directory where quantification files are located.
file_pattern (str): Pattern to match quantification files.
Returns:
set: A set containing the top 100 transcripts.
"""
try:
# Find the quantification file within the directory
quant_file_path = glob.glob(os.path.join(quant_dir, file_pattern))[0]
with open(quant_file_path, "r") as file_handle:
# Read the file and extract the top 100 transcripts
return {line.split()[0] for i, line in enumerate(file_handle) if i > 0 and i <= 100}
except IndexError:
# Log an error and raise a FileNotFoundError if the quant file does not exist
logger.error("No quantification files found.")
raise FileNotFoundError("Quantification file not found.")
def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str:
"""
Discover the attribute in the GTF that corresponds to transcripts, prioritizing 'transcript_id'.
Parameters:
gtf_file (str): Path to the GTF file.
transcripts (Set[str]): A set of transcripts to match in the GTF file.
Returns:
str: The attribute name that corresponds to transcripts in the GTF file.
"""
votes = Counter()
with open(gtf_file) as inh:
# Read GTF file, skipping header lines
for line in filter(lambda x: not x.startswith("#"), inh):
cols = line.split("\t")
# Use regular expression to correctly split the attributes string
attributes_str = cols[8]
attributes = dict(re.findall(r'(\S+) "(.*?)(?<!\\)";', attributes_str))
votes.update(key for key, value in attributes.items() if value in transcripts)
if not votes:
# Log a warning if no matching attribute is found
logger.warning("No attribute in GTF matching transcripts")
return ""
# Check if 'transcript_id' is among the attributes with the highest votes
if "transcript_id" in votes and votes["transcript_id"] == max(votes.values()):
logger.info("Attribute 'transcript_id' corresponds to transcripts.")
return "transcript_id"
# If 'transcript_id' isn't the highest, determine the most common attribute that matches the transcripts
attribute, _ = votes.most_common(1)[0]
logger.info(f"Attribute '{attribute}' corresponds to transcripts.")
return attribute
def parse_attributes(attributes_text: str) -> Dict[str, str]:
"""
Parse the attributes column of a GTF file.
:param attributes_text: The attributes column as a string.
:return: A dictionary of the attributes.
"""
# Split the attributes string by semicolon and strip whitespace
attributes = attributes_text.strip().split(";")
attr_dict = OrderedDict()
# Iterate over each attribute pair
for attribute in attributes:
# Split the attribute into key and value, ensuring there are two parts
parts = attribute.strip().split(" ", 1)
if len(parts) == 2:
key, value = parts
# Remove any double quotes from the value
value = value.replace('"', "")
attr_dict[key] = value
return attr_dict
def map_transcripts_to_gene(
quant_type: str, gtf_file: str, quant_dir: str, gene_id: str, extra_id_field: str, output_file: str
) -> bool:
"""
Map transcripts to gene names and write the output to a file.
Parameters:
quant_type (str): The quantification method used (e.g., 'salmon').
gtf_file (str): Path to the GTF file.
quant_dir (str): Directory where quantification files are located.
gene_id (str): The gene ID attribute in the GTF file.
extra_id_field (str): Additional ID field in the GTF file.
output_file (str): The output file path.
Returns:
bool: True if the operation was successful, False otherwise.
"""
# Read the top transcripts based on quantification type
transcripts = read_top_transcripts(quant_dir, "*quant_results.sf" if quant_type == "salmon" else "*abundance.tsv")
# Discover the attribute that corresponds to transcripts in the GTF
transcript_attribute = discover_transcript_attribute(gtf_file, transcripts)
if not transcript_attribute:
# If no attribute is found, return False
return False
# Open GTF and output file to write the mappings
# Initialize the set to track seen combinations
seen = set()
with open(gtf_file) as inh, open(output_file, "w") as output_handle:
# Parse each line of the GTF, mapping transcripts to genes
for line in filter(lambda x: not x.startswith("#"), inh):
cols = line.split("\t")
attr_dict = parse_attributes(cols[8])
if gene_id in attr_dict and transcript_attribute in attr_dict:
# Create a unique identifier for the transcript-gene combination
transcript_gene_pair = (attr_dict[transcript_attribute], attr_dict[gene_id])
# Check if the combination has already been seen
if transcript_gene_pair not in seen:
# If it's a new combination, write it to the output and add to the seen set
extra_id = attr_dict.get(extra_id_field, attr_dict[gene_id])
output_handle.write(f"{attr_dict[transcript_attribute]}\t{attr_dict[gene_id]}\t{extra_id}\n")
seen.add(transcript_gene_pair)
return True
# Main function to parse arguments and call the mapping function
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Map transcripts to gene names for tximport.")
parser.add_argument("--quant_type", type=str, help="Quantification type", default="salmon")
parser.add_argument("--gtf", type=str, help="GTF file", required=True)
parser.add_argument("--quants", type=str, help="Output of quantification", required=True)
parser.add_argument("--id", type=str, help="Gene ID in the GTF file", required=True)
parser.add_argument("--extra", type=str, help="Extra ID in the GTF file")
parser.add_argument("-o", "--output", dest="output", default="tx2gene.tsv", type=str, help="File with output")
args = parser.parse_args()
if not map_transcripts_to_gene(args.quant_type, args.gtf, args.quants, args.id, args.extra, args.output):
logger.error("Failed to map transcripts to genes.")

View File

@@ -0,0 +1,258 @@
name: "tximport"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--quant_results"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--tx2gene_tsv"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quant_type"
description: "Method used for quantification"
info: null
required: false
choices:
- "salmon"
- "kallisto"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--tpm_gene"
info: null
default:
- "merged.gene_tpm.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene"
info: null
default:
- "merged.gene_counts.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene_length_scaled"
info: null
default:
- "merged.gene_counts_length_scaled.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene_scaled"
info: null
default:
- "merged.gene_counts_scaled.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--lengths_gene"
info: null
default:
- "merged.gene_length.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tpm_transcript"
info: null
default:
- "merged.transcript_tpm.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcript"
info: null
default:
- "merged.transcript_counts.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--lengths_transcript"
info: null
default:
- "merged.transcript_length.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "tximport.r"
description: "Get dataframe linking transcript ID, gene ID, and gene name"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/tximport/main.nf"
last_sha: "489bcb4efdc7bd58839b22b0360d26b4d80b87a8"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "r-base"
- "libcurl4-openssl-dev"
- "libssl-dev"
- "libxml2-dev"
interactive: false
- type: "r"
cran:
- "jsonlite"
bioc:
- "SummarizedExperiment"
- "tximport"
- "tximeta"
bioc_force_install: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/tximport/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/tximport"
executable: "target/executable/tximport/tximport"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,141 @@
#!/usr/bin/env Rscript
# Script for importing and processing transcript-level quantifications.
# Written by Lorena Pantano, later modified by Jonathan Manning, and released under the MIT license.
# Loading required libraries
library(SummarizedExperiment)
library(tximport)
# Parsing command line arguments
args <- commandArgs(trailingOnly=TRUE)
if (length(args) < 4) {
stop("Usage: tximport.r <coldata_path> <path> <prefix> <quant_type> <tx2gene_path>",
call.=FALSE)
}
# Assigning command line arguments to variables
coldata_path <- args[1]
path <- args[2]
prefix <- args[3]
quant_type <- args[4]
tx2gene_path <- args[5]
## Functions
# Build a table from a SummarizedExperiment object
build_table <- function(se.obj, slot) {
cbind(rowData(se.obj)[,1:2], assays(se.obj)[[slot]])
}
# Write a table to a file with given parameters
write_se_table <- function(params) {
file_name <- paste0(prefix, ".", params$suffix)
write.table(build_table(params$obj, params$slot), file_name,
sep="\t", quote=FALSE, row.names = FALSE)
}
# Read transcript metadata from a given path
read_transcript_info <- function(tinfo_path){
info <- file.info(tinfo_path)
if (info$size == 0) {
stop("tx2gene file is empty")
}
transcript_info <- read.csv(tinfo_path, sep="\t", header = FALSE,
col.names = c("tx", "gene_id", "gene_name"))
extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]]))
transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra))
transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ]
rownames(transcript_info) <- transcript_info[["tx"]]
list(transcript = transcript_info,
gene = unique(transcript_info[,2:3]),
tx2gene = transcript_info[,1:2])
}
# Read and process sample/column data from a given path
read_coldata <- function(coldata_path){
if (file.exists(coldata_path)) {
coldata <- read.csv(coldata_path, sep="\t")
coldata <- coldata[match(names, coldata[,1]),]
coldata <- cbind(files = fns, coldata)
} else {
message("ColData not available: ", coldata_path)
coldata <- data.frame(files = fns, names = names)
}
rownames(coldata) <- coldata[["names"]]
}
# Create a SummarizedExperiment object with given data
create_summarized_experiment <- function(counts, abundance, length, col_data, row_data) {
SummarizedExperiment(assays = list(counts = counts, abundance = abundance, length = length),
colData = col_data,
rowData = row_data)
}
# Main script starts here
# Define pattern for file names based on quantification type
pattern <- ifelse(quant_type == "kallisto", "abundance.tsv", ".*quant_results\\.sf")
fns <- list.files(path, pattern = pattern, recursive = T, full.names = T)
names <- basename(fns)
names(fns) <- names
dropInfReps <- quant_type == "kallisto"
# Import transcript-level quantifications
txi <- tximport(fns, type = quant_type, txOut = TRUE, dropInfReps = dropInfReps)
# Read transcript and sample data
transcript_info <- read_transcript_info(tx2gene_path)
coldata <- read_coldata(coldata_path)
# Create initial SummarizedExperiment object
se <- create_summarized_experiment(txi[["counts"]], txi[["abundance"]], txi[["length"]],
DataFrame(coldata), transcript_info$transcript)
# Setting parameters for writing tables
params <- list(
list(obj = se, slot = "abundance", suffix = "transcript_tpm.tsv"),
list(obj = se, slot = "counts", suffix = "transcript_counts.tsv"),
list(obj = se, slot = "length", suffix = "transcript_lengths.tsv")
)
# Process gene-level data if tx2gene mapping is available
if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) {
tx2gene <- transcript_info$tx2gene
gi <- summarizeToGene(txi, tx2gene = tx2gene)
gi.ls <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM")
gi.s <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "scaledTPM")
gene_info <- transcript_info$gene[match(rownames(gi[[1]]), transcript_info$gene[["gene_id"]]),]
rownames(gene_info) <- gene_info[["tx"]]
col_data_frame <- DataFrame(coldata)
# Create gene-level SummarizedExperiment objects
gse <- create_summarized_experiment(gi[["counts"]], gi[["abundance"]], gi[["length"]],
col_data_frame, gene_info)
gse.ls <- create_summarized_experiment(gi.ls[["counts"]], gi.ls[["abundance"]], gi.ls[["length"]],
col_data_frame, gene_info)
gse.s <- create_summarized_experiment(gi.s[["counts"]], gi.s[["abundance"]], gi.s[["length"]],
col_data_frame, gene_info)
params <- c(params, list(
list(obj = gse, slot = "length", suffix = "gene_lengths.tsv"),
list(obj = gse, slot = "abundance", suffix = "gene_tpm.tsv"),
list(obj = gse, slot = "counts", suffix = "gene_counts.tsv"),
list(obj = gse.ls, slot = "abundance", suffix = "gene_tpm_length_scaled.tsv"),
list(obj = gse.ls, slot = "counts", suffix = "gene_counts_length_scaled.tsv"),
list(obj = gse.s, slot = "abundance", suffix = "gene_tpm_scaled.tsv"),
list(obj = gse.s, slot = "counts", suffix = "gene_counts_scaled.tsv")
))
}
# Writing tables for each set of parameters
done <- lapply(params, write_se_table)
# Output session information and citations
citation("tximeta")
sessionInfo()

View File

@@ -0,0 +1,175 @@
name: "bedclip"
namespace: "ucsc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input_bedgraph"
description: "bedGraph file which should be converted"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sizes"
description: "File with chromosome sizes"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_bedgraph"
description: "bedGraph file after clipping"
info: null
default:
- "$id.$key.bedgraph"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Remove lines from bed file that refer to off-chromosome locations"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.bedgraph"
- type: "file"
path: "genome.sizes"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/ucsc/bedclip/main.nf"
- "modules/nf-core/ucsc/bedclip/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "rsync"
- "libcurl4"
interactive: false
- type: "docker"
run:
- "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip\
\ /usr/local/bin/\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/ucsc/bedclip/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/ucsc/bedclip"
executable: "target/executable/ucsc/bedclip/bedclip"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,175 @@
name: "bedgraphtobigwig"
namespace: "ucsc"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--bedgraph"
description: "bedGraph file which should be converted"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sizes"
description: "File with chromosome sizes"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--bigwig"
description: "bigWig coverage file relative to genes on the input file"
info: null
default:
- "$id.$key.bigwig"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Convert a bedGraph file to bigWig format"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.bedgraph"
- type: "file"
path: "genome.sizes"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/ucsc/bedgraphtobigwig/main.nf"
- "modules/nf-core/ucsc/bedgraphtobigwig/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "rsync"
- "libcurl4"
interactive: false
- type: "docker"
run:
- "rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig\
\ /usr/local/bin/\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/ucsc/bedgraphtobigwig/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/ucsc/bedgraphtobigwig"
executable: "target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,206 @@
name: "umitools_dedup"
namespace: "umitools"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "boolean"
name: "--paired"
description: "Paired fastq files or not?"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam"
description: "Input BAM file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai"
description: "BAM index"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--get_output_stats"
description: "Whether or not to generate output stats."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_bam"
description: "Deduplicated BAM file"
info: null
default:
- "$id.$key.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_stats"
description: "Directory containing UMI based dedupllication statistics files"
info: null
default:
- "$id.umi_dedup.stats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\
\ to the read.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "chr19.bam"
- type: "file"
path: "chr19.bam.bai"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/umitools/dedup/main.nf"
- "modules/nf-core/umitools/dedup/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "pip"
interactive: false
- type: "python"
user: false
packages:
- "umi_tools"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/umitools/umitools_dedup/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/umitools/umitools_dedup"
executable: "target/executable/umitools/umitools_dedup/umitools_dedup"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,264 @@
name: "umitools_extract"
namespace: "umitools"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "boolean"
name: "--paired"
description: "Paired fastq files or not?"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "Input fastq files, either one or two (paired)"
info: null
example:
- "sample.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ","
- type: "string"
name: "--bc_pattern"
description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\
\ first 6 nucleotides of the read are from the UMI."
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ","
- name: "Output"
arguments:
- type: "file"
name: "--fastq_1"
description: "Output file for read 1."
info: null
default:
- "$id.$key.read_1.fastq"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Output file for read 2."
info: null
default:
- "$id.$key.read_2.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Optional arguments"
arguments:
- type: "string"
name: "--umitools_extract_method"
description: "UMI pattern to use."
info: null
default:
- "string"
required: false
choices:
- "string"
- "regex"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_umi_separator"
description: "The character that separates the UMI in the read name. Most likely\
\ a colon if you skipped the extraction with UMI-tools and used other software."
info: null
default:
- "_"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_grouping_method"
description: "Method to use to determine read groups by subsuming those with similar\
\ UMIs. All methods start by identifying the reads with the same mapping position,\
\ but treat similar yet nonidentical UMIs differently."
info: null
default:
- "directional"
required: false
choices:
- "unique"
- "percentile"
- "cluster"
- "adjacency"
- "directional"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--umi_discard_read"
description: "After UMI barcode extraction discard either R1 or R2 by setting\
\ this parameter to 1 or 2, respectively."
info: null
default:
- 0
required: false
choices:
- 0
- 1
- 2
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "UMI-tools contains tools for dealing with Unique Molecular Identifiers\
\ (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See\
\ https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component\
\ flexible removes UMI sequences from fastq reads. UMIs are removed and appended\
\ to the read name.\nThis component extracts UMI barcode from a read and add it\
\ to the read name, leaving any sample barcode in place\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "scrb_seq_fastq.1.gz"
- type: "file"
path: "scrb_seq_fastq.2.gz"
- type: "file"
path: "slim.fastq.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/umitools/extract/main.nf"
- "modules/nf-core/umitools/extract/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "pip"
interactive: false
- type: "python"
user: false
packages:
- "umi_tools"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/umitools/umitools_extract/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/umitools/umitools_extract"
executable: "target/executable/umitools/umitools_extract/umitools_extract"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,167 @@
name: "umitools_prepareforquant"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--bam"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "$id.transcriptome_sorted.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--log"
info: null
default:
- "$id.$key.log"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
- type: "file"
path: "prepare-for-rsem.py"
description: "Fix paired-end reads in name sorted BAM file to prepare for salmon quantification"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/local/umitools_prepareforrsem.nf"
last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d"
status: "enabled"
requirements:
commands:
- "ps"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "pip"
interactive: false
- type: "python"
user: false
packages:
- "umi_tools"
- "pysam"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/umitools_prepareforquant/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/umitools_prepareforquant"
executable: "target/executable/umitools_prepareforquant/umitools_prepareforquant"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
"""
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Credits
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This script is a clone of the "prepare-for-rsem.py" script written by
Ian Sudbury, Tom Smith and other contributors to the UMI-tools package:
https://github.com/CGATOxford/UMI-tools
It has been included here to address problems encountered with
Salmon quant and RSEM as discussed in the issue below:
https://github.com/CGATOxford/UMI-tools/issues/465
When the "umi_tools prepare-for-rsem" command becomes available in an official
UMI-tools release this script will be replaced and deprecated.
Commit:
https://github.com/CGATOxford/UMI-tools/blob/bf8608d6a172c5ca0dcf33c126b4e23429177a72/umi_tools/prepare-for-rsem.py
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
prepare_for_rsem - make the output from dedup or group compatible with RSEM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The SAM format specification states that the mnext and mpos fields should point
to the primary alignment of a read's mate. However, not all aligners adhere to
this standard. In addition, the RSEM software requires that the mate of a read1
appears directly after it in its input BAM. This requires that there is exactly
one read1 alignment for every read2 and vice versa.
In general (except in a few edge cases) UMI tools outputs only the read2 to that
corresponds to the read specified in the mnext and mpos positions of a selected
read1, and only outputs this read once, even if multiple read1s point to it.
This makes UMI-tools outputs incompatible with RSEM. This script takes the output
from dedup or groups and ensures that each read1 has exactly one read2 (and vice
versa), that read2 always appears directly after read1,and that pairs point to
each other (note this is technically not valid SAM format). Copy any specified
tags from read1 to read2 if they are present (by default, UG and BX, the unique
group and correct UMI tags added by _group_)
Input must to name sorted.
https://raw.githubusercontent.com/CGATOxford/UMI-tools/master/LICENSE
"""
from umi_tools import Utilities as U
from collections import defaultdict, Counter
import pysam
import sys
usage = """
prepare_for_rsem - make output from dedup or group compatible with RSEM
Usage: umi_tools prepare_for_rsem [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM]
note: If --stdout is omited, standard out is output. To
generate a valid BAM file on standard out, please
redirect log with --log=LOGFILE or --log2stderr """
def chunk_bam(bamfile):
"""Take in a iterator of pysam.AlignmentSegment entries and yield
lists of reads that all share the same name"""
last_query_name = None
output_buffer = list()
for read in bamfile:
if last_query_name is not None and last_query_name != read.query_name:
yield (output_buffer)
output_buffer = list()
last_query_name = read.query_name
output_buffer.append(read)
yield (output_buffer)
def copy_tags(tags, read1, read2):
"""Given a list of tags, copies the values of these tags from read1
to read2, if the tag is set"""
for tag in tags:
try:
read1_tag = read1.get_tag(tag, with_value_type=True)
read2.set_tag(tag, value=read1_tag[0], value_type=read1_tag[1])
except KeyError:
pass
return read2
def pick_mate(read, template_dict, mate_key):
"""Find the mate of read in the template dict using key. It will retrieve
all reads at that key, and then scan to pick the one that refers to _read_
as it's mate. If there is no such read, it picks a first one it comes to"""
mate = None
# get a list of secondary reads at the correct alignment position
potential_mates = template_dict[not read.is_read1][mate_key]
# search through one at a time to find a read that points to the current read
# as its mate.
for candidate_mate in potential_mates:
if (
candidate_mate.next_reference_name == read.reference_name
and candidate_mate.next_reference_start == read.pos
):
mate = candidate_mate
# if no such read is found, then pick any old secondary alignment at that position
# note: this happens when UMI-tools outputs the wrong read as something's pair.
if mate is None and len(potential_mates) > 0:
mate = potential_mates[0]
return mate
def main(argv=None):
if argv is None:
argv = sys.argv
# setup command line parser
parser = U.OptionParser(version="%prog version: $Id$", usage=usage, description=globals()["__doc__"])
group = U.OptionGroup(parser, "RSEM preparation specific options")
group.add_option(
"--tags",
dest="tags",
type="string",
default="UG,BX",
help="Comma-separated list of tags to transfer from read1 to read2",
)
group.add_option(
"--sam", dest="sam", action="store_true", default=False, help="input and output SAM rather than BAM"
)
parser.add_option_group(group)
# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(
parser, argv=argv, add_group_dedup_options=False, add_umi_grouping_options=False, add_sam_options=False
)
skipped_stats = Counter()
if options.stdin != sys.stdin:
in_name = options.stdin.name
options.stdin.close()
else:
in_name = "-"
if options.sam:
mode = ""
else:
mode = "b"
inbam = pysam.AlignmentFile(in_name, "r" + mode)
if options.stdout != sys.stdout:
out_name = options.stdout.name
options.stdout.close()
else:
out_name = "-"
outbam = pysam.AlignmentFile(out_name, "w" + mode, template=inbam)
options.tags = options.tags.split(",")
for template in chunk_bam(inbam):
assert len(set(r.query_name for r in template)) == 1
current_template = {True: defaultdict(list), False: defaultdict(list)}
for read in template:
key = (read.reference_name, read.pos, not read.is_secondary)
current_template[read.is_read1][key].append(read)
output = set()
for read in template:
mate = None
# if this read is a non_primary alignment, we first want to check if it has a mate
# with the non-primary alignment flag set.
mate_key_primary = True
mate_key_secondary = (read.next_reference_name, read.next_reference_start, False)
# First look for a read that has the same primary/secondary status
# as read (i.e. secondary mate for secondary read, and primary mate
# for primary read)
mate_key = (read.next_reference_name, read.next_reference_start, read.is_secondary)
mate = pick_mate(read, current_template, mate_key)
# If none was found then look for the opposite (primary mate of secondary
# read or seconadary mate of primary read)
if mate is None:
mate_key = (read.next_reference_name, read.next_reference_start, not read.is_secondary)
mate = pick_mate(read, current_template, mate_key)
# If we still don't have a mate, then their can't be one?
if mate is None:
skipped_stats["no_mate"] += 1
U.warn(
"Alignment {} has no mate -- skipped".format(
"\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)]))
)
)
continue
# because we might want to make changes to the read, but not have those changes reflected
# if we need the read again,we copy the read. This is only way I can find to do this.
read = pysam.AlignedSegment().from_dict(read.to_dict(), read.header)
mate = pysam.AlignedSegment().from_dict(mate.to_dict(), read.header)
# Make it so that if our read is secondary, the mate is also secondary. We don't make the
# mate primary if the read is primary because we would otherwise end up with mulitple
# primary alignments.
if read.is_secondary:
mate.is_secondary = True
# In a situation where there is already one mate for each read, then we will come across
# each pair twice - once when we scan read1 and once when we scan read2. Thus we need
# to make sure we don't output something already output.
if read.is_read1:
mate = copy_tags(options.tags, read, mate)
output_key = str(read) + str(mate)
if output_key not in output:
output.add(output_key)
outbam.write(read)
outbam.write(mate)
skipped_stats["pairs_output"] += 1
elif read.is_read2:
read = copy_tags(options.tags, mate, read)
output_key = str(mate) + str(read)
if output_key not in output:
output.add(output_key)
outbam.write(mate)
outbam.write(read)
skipped_stats["pairs_output"] += 1
else:
skipped_stats["skipped_not_read_12"] += 1
U.warn(
"Alignment {} is neither read1 nor read2 -- skipped".format(
"\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)]))
)
)
continue
if not out_name == "-":
outbam.close()
U.info(
"Total pairs output: {}, Pairs skipped - no mates: {},"
" Pairs skipped - not read1 or 2: {}".format(
skipped_stats["pairs_output"], skipped_stats["no_mate"], skipped_stats["skipped_not_read12"]
)
)
U.Stop()
if __name__ == "__main__":
sys.exit(main(sys.argv))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,541 @@
name: "genome_alignment_and_quant"
namespace: "workflows"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "ID of the sample."
info: null
example:
- "foo"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_1"
alternatives:
- "-i"
description: "Path to the sample (or read 1 of paired end sample)."
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Path to read 2 of the sample."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity. Must be one of unstranded, forward, or\
\ reverse"
info: null
required: false
choices:
- "forward"
- "reverse"
- "unstranded"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "GTF file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcript_fasta"
description: "Fasta file of the reference transcriptome."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--star_index"
description: "STAR index directory."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--star_ignore_sjdbgtf"
description: "When using pre-built STAR indices do not re-extract and use splice\
\ junctions from the GTF file"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--seq_platform"
description: "Sequencing platform."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--seq_center"
description: "Sequencing center."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_star_align_args"
description: "Extra arguments to pass to STAR alignment command in addition to\
\ defaults defined by the pipeline."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--bam_csi_index"
description: "Create a CSI index for BAM files instead of the traditional BAI\
\ index. This will be required for genomes with larger chromosome sizes."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--umi_dedup_stats"
description: "Generate output stats when running \"umi_tools dedup\"."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--with_umi"
description: "Enable UMI-based read deduplication."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--salmon_quant_libtype"
description: "Override Salmon library type inferred based on strandedness defined\
\ in meta object."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_salmon_quant_args"
description: "Extra arguments to pass to salmon quant command in addition to defaults\
\ defined by the pipeline."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gtf_group_features"
description: "Define the attribute type used to group features in the GTF file\
\ when running Salmon."
info: null
default:
- "gene_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gtf_extra_attributes"
description: "By default, the pipeline uses the gene_name field to obtain additional\
\ gene identifiers from the input GTF file when running Salmon."
info: null
default:
- "gene_name"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "extra_rsem_calculate_expression_args"
description: "Extra arguments to pass to rsem-calculate-expression command in\
\ addition to defaults defined by the pipeline."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--aligner"
description: "Specifies the alignment algorithm to use - available options are\
\ 'star_salmon', 'star_rsem' and 'hisat2'."
info: null
default:
- "star_salmon"
required: false
choices:
- "star_salmon"
- "star_rsem"
- "hisat2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--rsem_index"
description: "Path to directory for pre-built RSEM index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--salmon_index"
description: "Path to directory for pre-built Salmon index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--star_multiqc"
info: null
default:
- "$id.star_align.log"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_sorted"
info: null
default:
- "$id.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_index"
info: null
default:
- "$id.genome.bam.bai"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_stats"
info: null
default:
- "$id.genome.stats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_flagstat"
info: null
default:
- "$id.genome.flagstat"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_idxstats"
info: null
default:
- "$id.genome.idxstats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcriptome_bam"
info: null
default:
- "$id.transcriptome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcriptome_bam_index"
info: null
default:
- "$id.transcriptome.bam.bai"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcriptome_bam_stats"
info: null
default:
- "$id.transcriptome.stats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcriptome_bam_flagstat"
info: null
default:
- "$id.transcriptome.flagstat"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcriptome_bam_idxstats"
info: null
default:
- "$id.transcriptome.idxstats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--quant_out_dir"
info: null
default:
- "$id.salmon_quant"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--quant_results_file"
info: null
default:
- "$id.quant.sf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "A viash sub-workflow for genome alignment and quantification stage of\
\ nf-core/rnaseq pipeline.\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "star/star_align_reads"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "samtools/samtools_sort"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "samtools/samtools_index"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "samtools/samtools_stats"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "samtools/samtools_flagstat"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "samtools/samtools_idxstats"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "umitools/umitools_dedup"
repository:
type: "local"
- name: "umitools_prepareforquant"
repository:
type: "local"
- name: "salmon/salmon_quant"
repository:
type: "vsh"
repo: "vsh/biobox"
- name: "rsem/rsem_calculate_expression"
repository:
type: "local"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/genome_alignment_and_quant/config.vsh.yaml"
runner: "executable"
engine: "native"
output: "target/executable/workflows/genome_alignment_and_quant"
executable: "target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/nextflow/star/star_align_reads"
- "target/dependencies/vsh/vsh/biobox/nextflow/samtools/samtools_sort"
- "target/dependencies/vsh/vsh/biobox/nextflow/samtools/samtools_index"
- "target/dependencies/vsh/vsh/biobox/nextflow/samtools/samtools_stats"
- "target/dependencies/vsh/vsh/biobox/nextflow/samtools/samtools_flagstat"
- "target/dependencies/vsh/vsh/biobox/nextflow/samtools/samtools_idxstats"
- "target/nextflow/umitools/umitools_dedup"
- "target/nextflow/umitools_prepareforquant"
- "target/dependencies/vsh/vsh/biobox/nextflow/salmon/salmon_quant"
- "target/nextflow/rsem/rsem_calculate_expression"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,293 @@
name: "merge_quant_results"
namespace: "workflows"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--salmon_quant_results"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--kallisto_quant_results"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ","
- type: "file"
name: "--gtf"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gtf_extra_attributes"
info: null
default:
- "gene_name"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gtf_group_features"
info: null
default:
- "gene_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quant_type"
description: "Quantification method used."
info: null
default:
- "salmon"
required: false
choices:
- "salmon"
- "kallisto"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--versions"
info: null
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--tpm_gene"
info: null
example:
- "gene_tpm.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene"
info: null
example:
- "gene_counts.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene_length_scaled"
info: null
example:
- "gene_counts_length_scaled.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_gene_scaled"
info: null
example:
- "gene_counts_scaled.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tpm_transcript"
info: null
example:
- "transcript_tpm.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--lengths_gene"
info: null
example:
- "gene_length.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcript"
info: null
example:
- "transcript_counts.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--lengths_transcript"
info: null
example:
- "transcript_length.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--quant_merged_summarizedexperiment"
info: null
example:
- "quant_merged_summarizedexperiment"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "A sub-workflow to merge the counts obtained from salmon quant across\
\ all samples."
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "tx2gene"
repository:
type: "local"
- name: "tximport"
repository:
type: "local"
- name: "summarizedexperiment"
repository:
type: "local"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/merge_quant_results/config.vsh.yaml"
runner: "executable"
engine: "native"
output: "target/executable/workflows/merge_quant_results"
executable: "target/executable/workflows/merge_quant_results/merge_quant_results"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
dependencies:
- "target/nextflow/tx2gene"
- "target/nextflow/tximport"
- "target/nextflow/summarizedexperiment"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

View File

@@ -0,0 +1,892 @@
#!/usr/bin/env bash
# merge_quant_results main
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
# The component may contain files which fall under a different license. The
# authors of this component should specify the license in the header of such
# files, or include a separate license file detailing the licenses of all included
# files.
set -e
if [ -z "$VIASH_TEMP" ]; then
VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR}
VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR}
VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP}
VIASH_TEMP=${VIASH_TEMP:-$TMPDIR}
VIASH_TEMP=${VIASH_TEMP:-$TMP}
VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR}
VIASH_TEMP=${VIASH_TEMP:-$TEMP}
VIASH_TEMP=${VIASH_TEMP:-/tmp}
fi
# define helper functions
# ViashQuote: put quotes around non flag values
# $1 : unquoted string
# return : possibly quoted string
# examples:
# ViashQuote --foo # returns --foo
# ViashQuote bar # returns 'bar'
# Viashquote --foo=bar # returns --foo='bar'
function ViashQuote {
if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then
echo "$1" | sed "s#=\(.*\)#='\1'#"
elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then
echo "$1"
else
echo "'$1'"
fi
}
# ViashRemoveFlags: Remove leading flag
# $1 : string with a possible leading flag
# return : string without possible leading flag
# examples:
# ViashRemoveFlags --foo=bar # returns bar
function ViashRemoveFlags {
echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//'
}
# ViashSourceDir: return the path of a bash file, following symlinks
# usage : ViashSourceDir ${BASH_SOURCE[0]}
# $1 : Should always be set to ${BASH_SOURCE[0]}
# returns : The absolute path of the bash file
function ViashSourceDir {
local source="$1"
while [ -h "$source" ]; do
local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )"
source="$(readlink "$source")"
[[ $source != /* ]] && source="$dir/$source"
done
cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd
}
# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks
# usage : ViashFindTargetDir 'ScriptPath'
# $1 : The location from where to start the upward search
# returns : The absolute path of the '.build.yaml' file
function ViashFindTargetDir {
local source="$1"
while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do
source=${source%/*}
done
echo $source
}
# see https://en.wikipedia.org/wiki/Syslog#Severity_level
VIASH_LOGCODE_EMERGENCY=0
VIASH_LOGCODE_ALERT=1
VIASH_LOGCODE_CRITICAL=2
VIASH_LOGCODE_ERROR=3
VIASH_LOGCODE_WARNING=4
VIASH_LOGCODE_NOTICE=5
VIASH_LOGCODE_INFO=6
VIASH_LOGCODE_DEBUG=7
VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE
# ViashLog: Log events depending on the verbosity level
# usage: ViashLog 1 alert Oh no something went wrong!
# $1: required verbosity level
# $2: display tag
# $3+: messages to display
# stdout: Your input, prepended by '[$2] '.
function ViashLog {
local required_level="$1"
local display_tag="$2"
shift 2
if [ $VIASH_VERBOSITY -ge $required_level ]; then
>&2 echo "[$display_tag]" "$@"
fi
}
# ViashEmergency: log events when the system is unstable
# usage: ViashEmergency Oh no something went wrong.
# stdout: Your input, prepended by '[emergency] '.
function ViashEmergency {
ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@"
}
# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database)
# usage: ViashAlert Oh no something went wrong.
# stdout: Your input, prepended by '[alert] '.
function ViashAlert {
ViashLog $VIASH_LOGCODE_ALERT alert "$@"
}
# ViashCritical: log events when a critical condition occurs
# usage: ViashCritical Oh no something went wrong.
# stdout: Your input, prepended by '[critical] '.
function ViashCritical {
ViashLog $VIASH_LOGCODE_CRITICAL critical "$@"
}
# ViashError: log events when an error condition occurs
# usage: ViashError Oh no something went wrong.
# stdout: Your input, prepended by '[error] '.
function ViashError {
ViashLog $VIASH_LOGCODE_ERROR error "$@"
}
# ViashWarning: log potentially abnormal events
# usage: ViashWarning Something may have gone wrong.
# stdout: Your input, prepended by '[warning] '.
function ViashWarning {
ViashLog $VIASH_LOGCODE_WARNING warning "$@"
}
# ViashNotice: log significant but normal events
# usage: ViashNotice This just happened.
# stdout: Your input, prepended by '[notice] '.
function ViashNotice {
ViashLog $VIASH_LOGCODE_NOTICE notice "$@"
}
# ViashInfo: log normal events
# usage: ViashInfo This just happened.
# stdout: Your input, prepended by '[info] '.
function ViashInfo {
ViashLog $VIASH_LOGCODE_INFO info "$@"
}
# ViashDebug: log all events, for debugging purposes
# usage: ViashDebug This just happened.
# stdout: Your input, prepended by '[debug] '.
function ViashDebug {
ViashLog $VIASH_LOGCODE_DEBUG debug "$@"
}
# find source folder of this component
VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}`
# find the root of the built components & dependencies
VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR`
# define meta fields
VIASH_META_NAME="merge_quant_results"
VIASH_META_FUNCTIONALITY_NAME="merge_quant_results"
VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME"
VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "merge_quant_results main"
echo ""
echo "A sub-workflow to merge the counts obtained from salmon quant across all"
echo "samples."
echo ""
echo "Input:"
echo " --salmon_quant_results"
echo " type: file, multiple values allowed, file must exist"
echo ""
echo " --kallisto_quant_results"
echo " type: file, multiple values allowed, file must exist"
echo ""
echo " --gtf"
echo " type: file, file must exist"
echo ""
echo " --gtf_extra_attributes"
echo " type: string"
echo " default: gene_name"
echo ""
echo " --gtf_group_features"
echo " type: string"
echo " default: gene_id"
echo ""
echo " --quant_type"
echo " type: string"
echo " default: salmon"
echo " choices: [ salmon, kallisto ]"
echo " Quantification method used."
echo ""
echo " --versions"
echo " type: file"
echo ""
echo "Output:"
echo " --tpm_gene"
echo " type: file, output, file must exist"
echo " example: gene_tpm.tsv"
echo ""
echo " --counts_gene"
echo " type: file, output, file must exist"
echo " example: gene_counts.tsv"
echo ""
echo " --counts_gene_length_scaled"
echo " type: file, output, file must exist"
echo " example: gene_counts_length_scaled.tsv"
echo ""
echo " --counts_gene_scaled"
echo " type: file, output, file must exist"
echo " example: gene_counts_scaled.tsv"
echo ""
echo " --tpm_transcript"
echo " type: file, output, file must exist"
echo " example: transcript_tpm.tsv"
echo ""
echo " --lengths_gene"
echo " type: file, output, file must exist"
echo " example: gene_length.tsv"
echo ""
echo " --counts_transcript"
echo " type: file, output, file must exist"
echo " example: transcript_counts.tsv"
echo ""
echo " --lengths_transcript"
echo " type: file, output, file must exist"
echo " example: transcript_length.tsv"
echo ""
echo " --quant_merged_summarizedexperiment"
echo " type: file, output, file must exist"
echo " example: quant_merged_summarizedexperiment"
}
# initialise variables
VIASH_MODE='run'
VIASH_ENGINE_ID='native'
# initialise array
VIASH_POSITIONAL_ARGS=''
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
ViashHelp
exit
;;
---v|---verbose)
let "VIASH_VERBOSITY=VIASH_VERBOSITY+1"
shift 1
;;
---verbosity)
VIASH_VERBOSITY="$2"
shift 2
;;
---verbosity=*)
VIASH_VERBOSITY="$(ViashRemoveFlags "$1")"
shift 1
;;
--version)
echo "merge_quant_results main"
exit
;;
--salmon_quant_results)
if [ -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then
VIASH_PAR_SALMON_QUANT_RESULTS="$2"
else
VIASH_PAR_SALMON_QUANT_RESULTS="$VIASH_PAR_SALMON_QUANT_RESULTS,""$2"
fi
[ $# -lt 2 ] && ViashError Not enough arguments passed to --salmon_quant_results. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--salmon_quant_results=*)
if [ -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then
VIASH_PAR_SALMON_QUANT_RESULTS=$(ViashRemoveFlags "$1")
else
VIASH_PAR_SALMON_QUANT_RESULTS="$VIASH_PAR_SALMON_QUANT_RESULTS,"$(ViashRemoveFlags "$1")
fi
shift 1
;;
--kallisto_quant_results)
if [ -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS" ]; then
VIASH_PAR_KALLISTO_QUANT_RESULTS="$2"
else
VIASH_PAR_KALLISTO_QUANT_RESULTS="$VIASH_PAR_KALLISTO_QUANT_RESULTS,""$2"
fi
[ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_quant_results. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--kallisto_quant_results=*)
if [ -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS" ]; then
VIASH_PAR_KALLISTO_QUANT_RESULTS=$(ViashRemoveFlags "$1")
else
VIASH_PAR_KALLISTO_QUANT_RESULTS="$VIASH_PAR_KALLISTO_QUANT_RESULTS,"$(ViashRemoveFlags "$1")
fi
shift 1
;;
--gtf)
[ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_GTF="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--gtf=*)
[ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_GTF=$(ViashRemoveFlags "$1")
shift 1
;;
--gtf_extra_attributes)
[ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_GTF_EXTRA_ATTRIBUTES="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_extra_attributes. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--gtf_extra_attributes=*)
[ -n "$VIASH_PAR_GTF_EXTRA_ATTRIBUTES" ] && ViashError Bad arguments for option \'--gtf_extra_attributes=*\': \'$VIASH_PAR_GTF_EXTRA_ATTRIBUTES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_GTF_EXTRA_ATTRIBUTES=$(ViashRemoveFlags "$1")
shift 1
;;
--gtf_group_features)
[ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_GTF_GROUP_FEATURES="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf_group_features. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--gtf_group_features=*)
[ -n "$VIASH_PAR_GTF_GROUP_FEATURES" ] && ViashError Bad arguments for option \'--gtf_group_features=*\': \'$VIASH_PAR_GTF_GROUP_FEATURES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_GTF_GROUP_FEATURES=$(ViashRemoveFlags "$1")
shift 1
;;
--quant_type)
[ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_QUANT_TYPE="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_type. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--quant_type=*)
[ -n "$VIASH_PAR_QUANT_TYPE" ] && ViashError Bad arguments for option \'--quant_type=*\': \'$VIASH_PAR_QUANT_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_QUANT_TYPE=$(ViashRemoveFlags "$1")
shift 1
;;
--versions)
[ -n "$VIASH_PAR_VERSIONS" ] && ViashError Bad arguments for option \'--versions\': \'$VIASH_PAR_VERSIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_VERSIONS="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --versions. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--versions=*)
[ -n "$VIASH_PAR_VERSIONS" ] && ViashError Bad arguments for option \'--versions=*\': \'$VIASH_PAR_VERSIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_VERSIONS=$(ViashRemoveFlags "$1")
shift 1
;;
--tpm_gene)
[ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_TPM_GENE="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_gene. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--tpm_gene=*)
[ -n "$VIASH_PAR_TPM_GENE" ] && ViashError Bad arguments for option \'--tpm_gene=*\': \'$VIASH_PAR_TPM_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_TPM_GENE=$(ViashRemoveFlags "$1")
shift 1
;;
--counts_gene)
[ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_GENE="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--counts_gene=*)
[ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1")
shift 1
;;
--counts_gene_length_scaled)
[ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_GENE_LENGTH_SCALED="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_length_scaled. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--counts_gene_length_scaled=*)
[ -n "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_length_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_GENE_LENGTH_SCALED=$(ViashRemoveFlags "$1")
shift 1
;;
--counts_gene_scaled)
[ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_GENE_SCALED="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene_scaled. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--counts_gene_scaled=*)
[ -n "$VIASH_PAR_COUNTS_GENE_SCALED" ] && ViashError Bad arguments for option \'--counts_gene_scaled=*\': \'$VIASH_PAR_COUNTS_GENE_SCALED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_GENE_SCALED=$(ViashRemoveFlags "$1")
shift 1
;;
--tpm_transcript)
[ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_TPM_TRANSCRIPT="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --tpm_transcript. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--tpm_transcript=*)
[ -n "$VIASH_PAR_TPM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--tpm_transcript=*\': \'$VIASH_PAR_TPM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_TPM_TRANSCRIPT=$(ViashRemoveFlags "$1")
shift 1
;;
--lengths_gene)
[ -n "$VIASH_PAR_LENGTHS_GENE" ] && ViashError Bad arguments for option \'--lengths_gene\': \'$VIASH_PAR_LENGTHS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_LENGTHS_GENE="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --lengths_gene. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--lengths_gene=*)
[ -n "$VIASH_PAR_LENGTHS_GENE" ] && ViashError Bad arguments for option \'--lengths_gene=*\': \'$VIASH_PAR_LENGTHS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_LENGTHS_GENE=$(ViashRemoveFlags "$1")
shift 1
;;
--counts_transcript)
[ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_TRANSCRIPT="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcript. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--counts_transcript=*)
[ -n "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--counts_transcript=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COUNTS_TRANSCRIPT=$(ViashRemoveFlags "$1")
shift 1
;;
--lengths_transcript)
[ -n "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--lengths_transcript\': \'$VIASH_PAR_LENGTHS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_LENGTHS_TRANSCRIPT="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --lengths_transcript. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--lengths_transcript=*)
[ -n "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && ViashError Bad arguments for option \'--lengths_transcript=*\': \'$VIASH_PAR_LENGTHS_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_LENGTHS_TRANSCRIPT=$(ViashRemoveFlags "$1")
shift 1
;;
--quant_merged_summarizedexperiment)
[ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_merged_summarizedexperiment. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--quant_merged_summarizedexperiment=*)
[ -n "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && ViashError Bad arguments for option \'--quant_merged_summarizedexperiment=*\': \'$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT=$(ViashRemoveFlags "$1")
shift 1
;;
---engine)
VIASH_ENGINE_ID="$2"
shift 2
;;
---engine=*)
VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")"
shift 1
;;
---cpus)
[ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_META_CPUS="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
---cpus=*)
[ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_META_CPUS=$(ViashRemoveFlags "$1")
shift 1
;;
---memory)
[ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_META_MEMORY="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
---memory=*)
[ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_META_MEMORY=$(ViashRemoveFlags "$1")
shift 1
;;
*) # positional arg or unknown option
# since the positional args will be eval'd, can we always quote, instead of using ViashQuote
VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'"
[[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters.
shift # past argument
;;
esac
done
# parse positional parameters
eval set -- $VIASH_POSITIONAL_ARGS
if [ "$VIASH_ENGINE_ID" == "native" ] ; then
VIASH_ENGINE_TYPE='native'
else
ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: native."
exit 1
fi
# setting computational defaults
# helper function for parsing memory strings
function ViashMemoryAsBytes {
local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'`
local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$'
if [[ $memory =~ $memory_regex ]]; then
local number=${memory/[^0-9]*/}
local symbol=${memory/*[0-9]/}
case $symbol in
b) memory_b=$number ;;
kb|k) memory_b=$(( $number * 1000 )) ;;
mb|m) memory_b=$(( $number * 1000 * 1000 )) ;;
gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;;
tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;;
pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;;
kib|ki) memory_b=$(( $number * 1024 )) ;;
mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;;
gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;;
tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;;
pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;;
esac
echo "$memory_b"
fi
}
# compute memory in different units
if [ ! -z ${VIASH_META_MEMORY+x} ]; then
VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY`
# do not define other variables if memory_b is an empty string
if [ ! -z "$VIASH_META_MEMORY_B" ]; then
VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 ))
VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 ))
VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 ))
VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 ))
VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 ))
VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 ))
VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 ))
VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 ))
VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 ))
VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 ))
else
# unset memory if string is empty
unset $VIASH_META_MEMORY_B
fi
fi
# unset nproc if string is empty
if [ -z "$VIASH_META_CPUS" ]; then
unset $VIASH_META_CPUS
fi
# check whether required parameters exist
if [ -z ${VIASH_META_NAME+x} ]; then
ViashError 'name' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then
ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then
ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_EXECUTABLE+x} ]; then
ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_CONFIG+x} ]; then
ViashError 'config' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_TEMP_DIR+x} ]; then
ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
# filling in defaults
if [ -z ${VIASH_PAR_GTF_EXTRA_ATTRIBUTES+x} ]; then
VIASH_PAR_GTF_EXTRA_ATTRIBUTES="gene_name"
fi
if [ -z ${VIASH_PAR_GTF_GROUP_FEATURES+x} ]; then
VIASH_PAR_GTF_GROUP_FEATURES="gene_id"
fi
if [ -z ${VIASH_PAR_QUANT_TYPE+x} ]; then
VIASH_PAR_QUANT_TYPE="salmon"
fi
# check whether required files exist
if [ ! -z "$VIASH_PAR_SALMON_QUANT_RESULTS" ]; then
IFS=','
set -f
for file in $VIASH_PAR_SALMON_QUANT_RESULTS; do
unset IFS
if [ ! -e "$file" ]; then
ViashError "Input file '$file' does not exist."
exit 1
fi
done
set +f
fi
if [ ! -z "$VIASH_PAR_KALLISTO_QUANT_RESULTS" ]; then
IFS=','
set -f
for file in $VIASH_PAR_KALLISTO_QUANT_RESULTS; do
unset IFS
if [ ! -e "$file" ]; then
ViashError "Input file '$file' does not exist."
exit 1
fi
done
set +f
fi
if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then
ViashError "Input file '$VIASH_PAR_GTF' does not exist."
exit 1
fi
# check whether parameters values are of the right type
if [[ -n "$VIASH_META_CPUS" ]]; then
if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_B" ]]; then
if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_KB" ]]; then
if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_MB" ]]; then
if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_GB" ]]; then
if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_TB" ]]; then
if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_PB" ]]; then
if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then
if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then
if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then
if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then
if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then
if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then
ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters.
exit 1
fi
fi
# check whether value is belongs to a set of choices
if [ ! -z "$VIASH_PAR_QUANT_TYPE" ]; then
VIASH_PAR_QUANT_TYPE_CHOICES=("salmon;kallisto")
IFS=';'
set -f
if ! [[ ";${VIASH_PAR_QUANT_TYPE_CHOICES[*]};" =~ ";$VIASH_PAR_QUANT_TYPE;" ]]; then
ViashError '--quant_type' specified value of \'$VIASH_PAR_QUANT_TYPE\' is not in the list of allowed values. Use "--help" to get more information on the parameters.
exit 1
fi
set +f
unset IFS
fi
# create parent directories of output files, if so desired
if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_GENE")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_TPM_GENE")"
fi
if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE")"
fi
if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED")"
fi
if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE_SCALED")"
fi
if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_TPM_TRANSCRIPT")"
fi
if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_LENGTHS_GENE")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_LENGTHS_GENE")"
fi
if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPT")"
fi
if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_LENGTHS_TRANSCRIPT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_LENGTHS_TRANSCRIPT")"
fi
if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT")"
fi
if [ "$VIASH_ENGINE_ID" == "native" ] ; then
if [ "$VIASH_MODE" == "run" ]; then
VIASH_CMD="bash"
else
ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'."
exit 1
fi
fi
# set dependency paths
VIASH_DEP_TX2GENE="$VIASH_META_RESOURCES_DIR/../../../nextflow/tx2gene/main.nf"
VIASH_DEP_TXIMPORT="$VIASH_META_RESOURCES_DIR/../../../nextflow/tximport/main.nf"
VIASH_DEP_SUMMARIZEDEXPERIMENT="$VIASH_META_RESOURCES_DIR/../../../nextflow/summarizedexperiment/main.nf"
ViashDebug "Running command: $(echo $VIASH_CMD)"
cat << VIASHEOF | eval $VIASH_CMD
set -e
tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-merge_quant_results-XXXXXX").nf
function clean_up {
rm "\$tempscript"
}
function interrupt {
echo -e "\nCTRL-C Pressed..."
exit 1
}
trap clean_up EXIT
trap interrupt INT SIGINT
cat > "\$tempscript" << 'VIASHMAIN'
//// VIASH START
// The following code has been auto-generated by Viash.
//// VIASH END
workflow run_wf {
take:
input_ch
main:
output_ch = input_ch
| map { id, state ->
def quant_results = state.quant_type == 'kallisto' ? state.kallisto_quant_results : state.salmon_quant_results
[id, state + [quant_results: quant_results]]
}
| tx2gene.run (
fromState: [
"quant_results": "quant_results",
"gtf_extra_attributes": "gtf_extra_attributes",
"gtf": "gtf",
"gtf_group_features": "gtf_group_features",
"quant_type": "quant_type"
],
toState: [ "tx2gene_tsv": "tsv" ]
)
| tximport.run (
fromState: [
"quant_results": "quant_results",
"tx2gene_tsv": "tx2gene_tsv",
"quant_type": "quant_type"
],
toState: [
"tpm_gene": "tpm_gene",
"counts_gene": "counts_gene",
"counts_gene_length_scaled": "counts_gene_length_scaled",
"counts_gene_scaled": "counts_gene_scaled",
"tpm_transcript": "tpm_transcript",
"counts_transcript": "counts_transcript",
"length_gene": "length_gene",
"length_transcript": "length_transcript"
]
)
| summarizedexperiment.run (
fromState: [
"tpm_gene": "tpm_gene",
"counts_gene": "counts_gene",
"counts_gene_length_scaled": "counts_gene_length_scaled",
"counts_gene_scaled": "counts_gene_scaled",
"tpm_transcript": "tpm_transcript",
"counts_transcript": "counts_transcript",
"tx2gene_tsv": "tx2gene_tsv"
],
toState: [ "quant_merged_summarizedexperiment": "output" ]
)
| setState (
[ "tpm_gene": "tpm_gene",
"counts_gene": "counts_gene",
"counts_gene_length_scaled": "counts_gene_length_scaled",
"counts_gene_scaled": "counts_gene_scaled",
"tpm_transcript": "tpm_transcript",
"counts_transcript": "counts_transcript",
"quant_merged_summarizedexperiment": "quant_merged_summarizedexperiment" ]
)
emit:
output_ch
}
VIASHMAIN
nextflow run . -main-script "\$tempscript" &
wait "\$!"
VIASHEOF
# check whether required files exist
if [ ! -z "$VIASH_PAR_TPM_GENE" ] && [ ! -e "$VIASH_PAR_TPM_GENE" ]; then
ViashError "Output file '$VIASH_PAR_TPM_GENE' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then
ViashError "Output file '$VIASH_PAR_COUNTS_GENE' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED" ]; then
ViashError "Output file '$VIASH_PAR_COUNTS_GENE_LENGTH_SCALED' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_COUNTS_GENE_SCALED" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE_SCALED" ]; then
ViashError "Output file '$VIASH_PAR_COUNTS_GENE_SCALED' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_TPM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_TPM_TRANSCRIPT" ]; then
ViashError "Output file '$VIASH_PAR_TPM_TRANSCRIPT' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_LENGTHS_GENE" ] && [ ! -e "$VIASH_PAR_LENGTHS_GENE" ]; then
ViashError "Output file '$VIASH_PAR_LENGTHS_GENE' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPT" ]; then
ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPT' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_LENGTHS_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_LENGTHS_TRANSCRIPT" ]; then
ViashError "Output file '$VIASH_PAR_LENGTHS_TRANSCRIPT' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ] && [ ! -e "$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT" ]; then
ViashError "Output file '$VIASH_PAR_QUANT_MERGED_SUMMARIZEDEXPERIMENT' does not exist."
exit 1
fi
exit 0

View File

@@ -0,0 +1,513 @@
name: "post_processing"
namespace: "workflows"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "ID of the sample."
info: null
example:
- "foo"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\
\ or auto"
info: null
default:
- "auto"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--paired"
description: "Paired fastq files or not?"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fasta"
description: "Path to FASTA genome file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fai"
description: "Path to FASTA index"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "GTF file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam"
description: "Genome BAM file"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chrom_sizes"
description: "File containing chromosome lengths"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--star_multiqc"
description: "STAR align log file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_picard_args"
description: "Extra arguments to pass to picard MarkDuplicates command in addition\
\ to defaults defined by the pipeline."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_stringtie_args"
description: "Extra arguments to pass to stringtie command in addition to defaults\
\ defined by the pipeline."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--stringtie_ignore_gtf"
description: "Perform reference-guided de novo assembly of transcripts using StringTie,\
\ i.e. don't restrict to those in GTF file."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_bedtools_args"
description: "Extra arguments to pass to bedtools genomecov command in addition\
\ to defaults defined by the pipeline."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--bam_csi_index"
description: "Create a CSI index for BAM files instead of the traditional BAI\
\ index. This will be required for genomes with larger chromosome sizes."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_mapped_reads"
description: "Minimum percentage of uniquely mapped reads below which samples\
\ are removed from further processing."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--with_umi"
description: "Enable UMI-based read deduplication."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_qc"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_markduplicates"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_stringtie"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_bigwig"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--processed_genome_bam"
info: null
default:
- "$id.markdup.sorted.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_index"
info: null
default:
- "$id.markdup.sorted.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_stats"
info: null
default:
- "$id.markdup.sorted.bam.stats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_flagstat"
info: null
default:
- "$id.markdup.sorted.bam.flagstat"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_bam_idxstats"
info: null
default:
- "$id.markdup.sorted.bam.idxstats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--markduplicates_metrics"
info: null
default:
- "$id.markdup.sorted.MarkDuplicates.metrics.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stringtie_transcript_gtf"
info: null
default:
- "$id.stringtie.transcripts.gtf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stringtie_coverage_gtf"
info: null
default:
- "$id.stringtie.coverage.gtf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stringtie_abundance"
info: null
default:
- "$id.stringtie.gene_abundance.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stringtie_ballgown"
info: null
default:
- "$id.stringtie.ballgown"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bedgraph_forward"
info: null
default:
- "$id.forward.bedgraph"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bedgraph_reverse"
info: null
default:
- "$id.reverse.bedgraph"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bigwig_forward"
info: null
default:
- "$id.forward.bigwig"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bigwig_reverse"
info: null
default:
- "$id.reverse.bigwig"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "A viash sub-workflow for the post-processing stage of nf-core/rnaseq\
\ pipeline.\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "picard_markduplicates"
repository:
type: "local"
- name: "samtools/samtools_sort"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
- name: "samtools/samtools_index"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
- name: "samtools/samtools_stats"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
- name: "samtools/samtools_flagstat"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
- name: "samtools/samtools_idxstats"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
- name: "stringtie"
repository:
type: "local"
- name: "bedtools_genomecov"
repository:
type: "local"
- name: "ucsc/bedclip"
repository:
type: "local"
- name: "ucsc/bedgraphtobigwig"
repository:
type: "local"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "v0.1"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/post_processing/config.vsh.yaml"
runner: "executable"
engine: "native"
output: "target/executable/workflows/post_processing"
executable: "target/executable/workflows/post_processing/post_processing"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
dependencies:
- "target/nextflow/picard_markduplicates"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_sort"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_index"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_stats"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_flagstat"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/samtools/samtools_idxstats"
- "target/nextflow/stringtie"
- "target/nextflow/bedtools_genomecov"
- "target/nextflow/ucsc/bedclip"
- "target/nextflow/ucsc/bedgraphtobigwig"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,669 @@
name: "pre_processing"
namespace: "workflows"
version: "main"
argument_groups:
- name: "Inputs"
arguments:
- type: "string"
name: "--id"
description: "ID of the sample."
info: null
example:
- "foo"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_1"
description: "Path to the sample (or read 1 of paired end sample)."
info: null
example:
- "input.fastq.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Path to read 2 of the sample."
info: null
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\
\ or auto"
info: null
default:
- "auto"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_index"
description: "BBsplit index"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_fasta_list"
description: "Path to comma-separated file containing a list of reference genomes\
\ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\
\ be explicitly set to \"false\". The file should contain 2 (comma separated)\
\ columns - short name and full path to reference genome(s)"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ribo_database_manifest"
description: "Text file containing paths to fasta files (one per line) that will\
\ be used to create the database for SortMeRNA."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcript_fasta"
description: "Path to FASTA transcriptome file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "Path to GTF annotation file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--salmon_index"
description: "Path to directory containing the Salmon index"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--num_trimmed_reads"
description: "Number of reads after trimming"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Extra pipeline options"
arguments:
- type: "boolean"
name: "--skip_qc"
description: "Skip QC steps of the workflow."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "FastQC options"
arguments:
- type: "boolean"
name: "--skip_fastqc"
description: "Skip FatQC step."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "UMI-tools options"
arguments:
- type: "boolean"
name: "--with_umi"
description: "Enable UMI-based read deduplication."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_umi_extract"
description: "Skip umi_tools extract step."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_extract_method"
description: "UMI pattern to use."
info: null
default:
- "string"
required: false
choices:
- "string"
- "regex"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_bc_pattern"
description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\
\ first 6 nucleotides of the read are from the UMI."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_bc_pattern2"
description: "The UMI barcode pattern to use if the UMI is located in read 2."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--umi_discard_read"
description: "After UMI barcode extraction discard either R1 or R2 by setting\
\ this parameter to 1 or 2, respectively."
info: null
default:
- 0
required: false
choices:
- 0
- 1
- 2
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_umi_separator"
description: "The character that separates the UMI in the read name. Most likely\
\ a colon if you skipped the extraction with UMI-tools and used other software."
info: null
default:
- "_"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umitools_grouping_method"
description: "Method to use to determine read groups by subsuming those with similar\
\ UMIs. All methods start by identifying the reads with the same mapping position,\
\ but treat similar yet nonidentical UMIs differently."
info: null
default:
- "directional"
required: false
choices:
- "unique"
- "percentile"
- "cluster"
- "adjacency"
- "directional"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--save_umi_intermeds"
description: "If this option is specified, intermediate FastQ and BAM files produced\
\ by UMI-tools are also saved in the results directory."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Read trimming options"
arguments:
- type: "string"
name: "--trimmer"
description: "Specify the trimming tool to use."
info: null
default:
- "trimgalore"
required: false
choices:
- "trimgalore"
- "fastp"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_trimgalore_args"
description: "Extra arguments to pass to Trim Galore! command in addition to defaults\
\ defined by the pipeline."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_trimmed_reads"
description: "Minimum number of trimmed reads below which samples are removed\
\ from further processing. Some downstream steps in the pipeline will fail if\
\ this threshold is too low."
info: null
default:
- 10000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_trimming"
description: "Skip the adapter trimming step."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--save_trimmed"
description: "Save the trimmed FastQ files in the results directory."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Alignment options"
arguments:
- type: "string"
name: "--extra_salmon_quant_args"
description: "Extra arguments to pass to salmon quant command in addition to defaults\
\ defined by the pipeline."
info: null
default:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Read filtering options"
arguments:
- type: "boolean"
name: "--skip_bbsplit"
description: "Skip BBSplit for removal of non-reference genome reads."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--remove_ribo_rna"
description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA."
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Other options"
arguments:
- type: "string"
name: "--extra_fq_subsample_args"
description: "Extra arguments to pass to fq subsample command in addition to defaults\
\ defined by the pipeline."
info: null
default:
- "--record-count 1000000 --seed 1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--qc_output1"
description: "Path to output directory"
info: null
default:
- "$id.$key.read_1.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--qc_output2"
description: "Path to output directory"
info: null
default:
- "$id.$key.read_2.fastq"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_html_1"
description: "FastQC HTML report for read 1."
info: null
default:
- "$id.read_1.fastqc.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_html_2"
description: "FastQC HTML report for read 2."
info: null
default:
- "$id.read_2.fastqc.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_zip_1"
description: "FastQC report archive for read 1."
info: null
default:
- "$id.read_1.fastqc.zip"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastqc_zip_2"
description: "FastQC report archive for read 2."
info: null
default:
- "$id.read_2.fastqc.zip"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trim_log_1"
info: null
default:
- "$id.read_1.trimming_report.txt"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trim_log_2"
info: null
default:
- "$id.read_2.trimming_report.txt"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trim_html_1"
info: null
default:
- "$id.read_1.trimmed_fastqc.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trim_html_2"
info: null
default:
- "$id.read_2.trimmed_fastqc.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trim_zip_1"
info: null
default:
- "$id.read_1.trimmed_fastqc.zip"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--trim_zip_2"
info: null
default:
- "$id.read_2.trimmed_fastqc.zip"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sortmerna_log"
description: "Sortmerna log file."
info: null
default:
- "$id.sortmerna.log"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--salmon_quant_output"
description: "Results from Salmon quant"
info: null
default:
- "$id.salmon_quant_output"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "fastqc"
repository:
type: "local"
- name: "umitools/umitools_extract"
repository:
type: "local"
- name: "trimgalore"
repository:
type: "local"
- name: "bbmap_bbsplit"
repository:
type: "local"
- name: "sortmerna"
repository:
type: "local"
- name: "fastp"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
- name: "fq_subsample"
repository:
type: "local"
- name: "salmon/salmon_quant"
repository:
type: "vsh"
repo: "vsh/biobox"
tag: "v0.1"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "v0.1"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/pre_processing/config.vsh.yaml"
runner: "executable"
engine: "native"
output: "target/executable/workflows/pre_processing"
executable: "target/executable/workflows/pre_processing/pre_processing"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
dependencies:
- "target/nextflow/fastqc"
- "target/nextflow/umitools/umitools_extract"
- "target/nextflow/trimgalore"
- "target/nextflow/bbmap_bbsplit"
- "target/nextflow/sortmerna"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/fastp"
- "target/nextflow/fq_subsample"
- "target/dependencies/vsh/vsh/biobox/v0.1/nextflow/salmon/salmon_quant"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,530 @@
name: "prepare_genome"
namespace: "workflows"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--fasta"
description: "Path to FASTA genome file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "Path to GTF annotation file. This parameter is *mandatory* if --genome\
\ is not specified."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gff"
description: "Path to GFF3 annotation file. Required if \"--gtf\" is not specified."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--additional_fasta"
description: "FASTA file to concatenate to genome FASTA file e.g. containing spike-in\
\ sequences."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcript_fasta"
description: "Path to FASTA transcriptome file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gene_bed"
description: "Path to BED file containing gene intervals. This will be created\
\ from the GTF file if not specified."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--splicesites"
description: "Splice sites file required for HISAT2."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--skip_bbsplit"
description: "Skip BBSplit for removal of non-reference genome reads."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_fasta_list"
description: "Path to comma-separated file containing a list of reference genomes\
\ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\
\ be explicitly set to \"false\". The file should contain 2 (comma separated)\
\ columns - short name and full path to reference genome(s)"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--star_index"
description: "Path to directory or tar.gz archive for pre-built STAR index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--rsem_index"
description: "Path to directory or tar.gz archive for pre-built RSEM index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "extra_rsem_prepare_reference_args"
description: "Extra arguments to pass to rsem-prepare-reference command in addition\
\ to defaults defined by the pipeline."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--salmon_index"
description: "Path to directory or tar.gz archive for pre-built Salmon index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--kallisto_index"
description: "Path to directory or tar.gz archive for pre-built Kallisto index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_index"
description: "Path to directory or tar.gz archive for pre-built BBSplit index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--pseudo_aligner_kmer_size"
description: "Kmer length passed to indexing step of pseudoaligners."
info: null
default:
- 31
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--gencode"
description: "Specify if the GTF annotation is in GENCODE format."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--biotype"
description: "Biotype value to use while appending entries to GTF file when additional\
\ fasta file is provided."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--filter_gtf"
description: "Whether to filter the GTF or not?"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--aligner"
description: "Specifies the alignment algorithm to use - available options are\
\ 'star_salmon', 'star_rsem' and 'hisat2'."
info: null
default:
- "star_salmon"
required: false
choices:
- "star_salmon"
- "star_rsem"
- "hisat2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--pseudo_aligner"
description: "Specifies the pseudo aligner to use - available options are 'salmon'.\
\ Runs in addition to '--aligner'."
info: null
default:
- "salmon"
required: false
choices:
- "salmon"
- "kallisto"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--fasta_uncompressed"
info: null
default:
- "reference_genome.fasta"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf_uncompressed"
info: null
default:
- "gene_annotation.gtf"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcript_fasta_uncompressed"
info: null
default:
- "transcriptome.fasta"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gene_bed_uncompressed"
info: null
default:
- "gene_annotation.bed"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--star_index_uncompressed"
description: "Path to STAR index."
info: null
default:
- "STAR_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--rsem_index_uncompressed"
description: "Path to directory or tar.gz archive for pre-built RSEM index."
info: null
default:
- "RSEM_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--salmon_index_uncompressed"
description: "Path to Salmon index."
info: null
default:
- "Salmon_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--kallisto_index_uncompressed"
description: "Path to Kallisto index."
info: null
default:
- "Kallisto_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bbsplit_index_uncompressed"
description: "Path to BBSplit index."
info: null
default:
- "BBSplit_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chrom_sizes"
description: "File containing chromosome lengths"
info: null
default:
- "reference_genome.fasta.sizes"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fai"
description: "FASTA index file"
info: null
default:
- "reference_genome.fasta.fai"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "A subworkflow for preparing all the required genome references\n"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "gunzip"
repository:
type: "local"
- name: "gffread"
repository:
type: "vsh"
repo: "biobox"
tag: "main"
- name: "cat_additional_fasta"
repository:
type: "local"
- name: "gtf2bed"
repository:
type: "local"
- name: "preprocess_transcripts_fasta"
repository:
type: "local"
- name: "gtf_filter"
repository:
type: "local"
- name: "rsem/rsem_prepare_reference"
repository:
type: "vsh"
repo: "biobox"
tag: "main"
- name: "getchromsizes"
repository:
type: "local"
- name: "untar"
repository:
type: "vsh"
repo: "craftbox"
tag: "main"
- name: "star/star_genome_generate"
repository:
type: "vsh"
repo: "biobox"
tag: "main"
- name: "bbmap_bbsplit"
repository:
type: "local"
- name: "salmon/salmon_index"
repository:
type: "vsh"
repo: "biobox"
tag: "main"
- name: "kallisto/kallisto_index"
repository:
type: "local"
repositories:
- type: "vsh"
name: "biobox"
repo: "biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "main"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/prepare_genome/config.vsh.yaml"
runner: "executable"
engine: "native"
output: "target/executable/workflows/prepare_genome"
executable: "target/executable/workflows/prepare_genome/prepare_genome"
viash_version: "0.9.0"
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
git_remote: "https://github.com/viash-hub/rnaseq"
dependencies:
- "target/nextflow/gunzip"
- "target/dependencies/vsh/vsh/biobox/main/nextflow/gffread"
- "target/nextflow/cat_additional_fasta"
- "target/nextflow/gtf2bed"
- "target/nextflow/preprocess_transcripts_fasta"
- "target/nextflow/gtf_filter"
- "target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference"
- "target/nextflow/getchromsizes"
- "target/dependencies/vsh/vsh/craftbox/main/nextflow/untar"
- "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate"
- "target/nextflow/bbmap_bbsplit"
- "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index"
- "target/nextflow/kallisto/kallisto_index"
package_config:
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
organization: "vsh"

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More