Build branch bump_viash_0_9_4 with version bump_viash_0_9_4 (be1cd83)

Build pipeline: viash-hub.biobox.bump-viash-0-9-4-275jj

Source commit: be1cd83dd6

Source message: Add PR number
This commit is contained in:
CI
2025-04-29 10:47:47 +00:00
commit e12b668142
1094 changed files with 653770 additions and 0 deletions

View File

@@ -0,0 +1,265 @@
name: "agat_convert_bed2gff"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--bed"
description: "Input bed file that will be converted."
info: null
example:
- "input.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gff"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--source"
description: "The source informs about the tool used to produce the data and is\
\ stored in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc.\
\ [default: data]\n"
info: null
example:
- "Stringtie"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--primary_tag"
description: "The primary_tag corresponds to the data type and is stored in 3rd\
\ field of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]\n"
info: null
example:
- "gene"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--inflate_off"
description: "By default we inflate the block fields (blockCount, blockSizes,\
\ blockStarts) to create subfeatures of the main feature (primary_tag). The\
\ type of subfeature created is based on the inflate_type parameter. If you\
\ do not want this inflating behaviour you can deactivate it by using the --inflate_off\
\ option.\n"
info: null
direction: "input"
- type: "string"
name: "--inflate_type"
description: "Feature type (3rd column in gff) created when inflate parameter\
\ activated [default: exon].\n"
info: null
example:
- "exon"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
description: "add verbosity"
info: null
direction: "input"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "Input agat config file. By default AGAT takes as input agat_config.yaml\
\ file from the working directory if any, otherwise it takes the orignal agat_config.yaml\
\ shipped with AGAT. To get the agat_config.yaml locally type: \"agat config\
\ --expose\". The --config option gives you the possibility to use your own\
\ AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script takes a bed file as input, and will translate it in gff format.\
\ The BED format is described here The script converts 0-based, half-open [start-1,\
\ end) bed file to 1-based, closed [start, end] General Feature Format v3 (GFF3).\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_bed2gff.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_bed2gff/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_bed2gff"
executable: "target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,255 @@
name: "agat_convert_embl2gff"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--embl"
description: "Input EMBL file that will be read."
info: null
example:
- "input.embl"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gff"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "boolean_true"
name: "--emblmygff3"
description: "Means that the EMBL flat file comes from the EMBLmyGFF3 software.\
\ This is an EMBL format dedicated for submission and contains particularity\
\ to deal with. This parameter is needed to get a proper sequence id in the\
\ GFF3 from an embl made with EMBLmyGFF3.\n"
info: null
direction: "input"
- type: "string"
name: "--primary_tag"
alternatives:
- "--pt"
- "-t"
description: "List of \"primary tag\". Useful to discard or keep specific features.\
\ Multiple tags must be comma-separated.\n"
info: null
example:
- "tag1"
- "tag2"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--discard"
alternatives:
- "-d"
description: "Means that primary tags provided by the option \"primary_tag\" will\
\ be discarded.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep"
alternatives:
- "-k"
description: "Means that only primary tags provided by the option \"primary_tag\"\
\ will be kept.\n"
info: null
direction: "input"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "Input agat config file. By default AGAT takes as input agat_config.yaml\
\ file from the working directory if any, otherwise it takes the original agat_config.yaml\
\ shipped with AGAT. To get the agat_config.yaml locally type: \"agat config\
\ --expose\". The --config option gives you the possibility to use your own\
\ AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script takes an EMBL file as input, and will translate it in gff\
\ format.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_embl2gff.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_embl2gff/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_embl2gff"
executable: "target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,260 @@
name: "agat_convert_genscan2gff"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--genscan"
alternatives:
- "-g"
description: "Input genscan bed file that will be converted."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gff"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--source"
description: "The source informs about the tool used to produce the data and is\
\ stored in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc.\
\ [default: data]\n"
info: null
example:
- "Stringtie"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--primary_tag"
description: "The primary_tag corresponds to the data type and is stored in 3rd\
\ field of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]\n"
info: null
example:
- "gene"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--inflate_type"
description: "Feature type (3rd column in gff) created when inflate parameter\
\ activated [default: exon].\n"
info: null
example:
- "exon"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
description: "add verbosity"
info: null
direction: "input"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT. The `--config` option gives you the possibility to use\
\ your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script takes a GENSCAN file as input, and will translate it in gff\n\
format. The GENSCAN format is described [here](http://genome.crg.es/courses/Bioinformatics2003_genefinding/results/genscan.html).\n\
\n**Known problem** \n\nYou must have submited only DNA sequence, without any header!!\
\ Indeed the tool expects only DNA\nsequences and does not crash/warn if an header\
\ is submited along the\nsequence. e.g If you have an header \">seq\" s-e-q are\
\ seen as the 3 first\nnucleotides of the sequence. Then all prediction location\
\ are shifted\naccordingly. (checked only on the [online version](http://argonaute.mit.edu/GENSCAN.html).\
\ \nI don't know if there is the same problem elsewhere.)\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
- "GENSCAN"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_genscan2gff.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_genscan2gff/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_genscan2gff"
executable: "target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,216 @@
name: "agat_convert_mfannot2gff"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--mfannot"
alternatives:
- "-m"
- "-i"
description: "The mfannot input file."
info: null
example:
- "input.mfannot"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-g"
- "-o"
description: "The GFF output file."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT. The `--config` option gives you the possibility to use\
\ your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Conversion utility for MFannot \"masterfile\" annotation produced by\
\ the\n[MFannot pipeline](http://megasun.bch.umontreal.ca/RNAweasel/). Reports\n\
GFF3 format.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF"
- "Mfannot"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_mfannot2gff.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_mfannot2gff/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_mfannot2gff"
executable: "target/executable/agat/agat_convert_mfannot2gff/agat_convert_mfannot2gff"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,258 @@
name: "agat_convert_sp_gff2gtf"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-i"
description: "Input GFF/GTF file that will be read"
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gtf"
description: "Output GTF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gtf"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--gtf_version"
description: "Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default\
\ value from AGAT config file (relax for the default config). The script option\
\ has the higher priority. \n\n * relax: all feature types are accepted. \
\ \n * GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine,\
\ start_codon, stop_codon, three_prime_utr and five_prime_utr. \n * GTF2.5\
\ (8 feature types accepted): gene, transcript, exon, CDS, UTR, start_codon,\
\ stop_codon, Selenocysteine. \n * GTF2.2 (9 feature types accepted): CDS,\
\ start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon.\
\ \n * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, exon,\
\ 5UTR, 3UTR. \n * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon,\
\ exon. \n * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon,\
\ exon, intron. \n"
info: null
example:
- "3"
required: false
choices:
- "relax"
- "1"
- "2"
- "2.1"
- "2.2"
- "2.5"
- "3"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "Input agat config file. By default AGAT takes as input agat_config.yaml\
\ file from the working directory if any, otherwise it takes the orignal agat_config.yaml\
\ shipped with AGAT. To get the agat_config.yaml locally type: \"agat config\
\ --expose\". The --config option gives you the possibility to use your own\
\ AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script aims to convert any GTF/GFF file into a proper GTF file.\
\ Full\ninformation about the format can be found here:\nhttps://agat.readthedocs.io/en/latest/gxf.html\
\ You can choose among 7\ndifferent GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax).\
\ Depending the\nversion selected the script will filter out the features that are\
\ not\naccepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene\npseudogene)\
\ will be converted into gene feature and every level2 feature\n(e.g mRNA ncRNA)\
\ will be converted into transcript feature. Using the\n\"relax\" option you will\
\ produce a GTF-like output keeping all original\nfeature types (3rd column). No\
\ modification will occur e.g. mRNA to\ntranscript.\n\nTo be fully GTF compliant\
\ all feature have a gene_id and a transcript_id\nattribute. The gene_id is unique\
\ identifier for the genomic source of\nthe transcript, which is used to group transcripts\
\ into genes. The\ntranscript_id is a unique identifier for the predicted transcript,\
\ which\nis used to group features into transcripts.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GTF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_sp_gff2gtf"
executable: "target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,218 @@
name: "agat_convert_sp_gff2tsv"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-f"
description: "Input GTF/GFF file."
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "String - Input agat config file. By default AGAT takes as input\n\
agat_config.yaml file from the working directory if any,\notherwise it takes\
\ the orignal agat_config.yaml shipped with\nAGAT. To get the agat_config.yaml\
\ locally type: \"agat config\n--expose\". The --config option gives you the\
\ possibility to use\nyour own AGAT config file (located elsewhere or named\n\
differently). \n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script aims to convert gtf/gff file into tabulated file. Attribute's\n\
tags from the 9th column become column titles.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_sp_gff2tsv.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_sp_gff2tsv"
executable: "target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
name: "agat_convert_sp_gxf2gxf"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gxf"
alternatives:
- "-g"
- "--gtf"
- "--gff"
description: "String - Input GTF/GFF file. Compressed file with .gz extension\
\ is accepted.\n"
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "String - Output GFF file. If no output file is specified, the output\
\ will be written to STDOUT.\n"
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "String - Input agat config file. By default AGAT takes as input\
\ agat_config.yaml file from the working directory if any, otherwise it takes\
\ the original agat_config.yaml shipped with AGAT. To get the agat_config.yaml\
\ locally type: \"agat config --expose\". The --config option gives you the\
\ possibility to use your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "This script fixes and/or standardizes any GTF/GFF file into full sorted\n\
GTF/GFF file. It AGAT parser removes duplicate features, fixes\nduplicated IDs,\
\ adds missing ID and/or Parent attributes, deflates\nfactorized attributes (attributes\
\ with several parents are duplicated\nwith uniq ID), add missing features when\
\ possible (e.g. add exon if only\nCDS described, add UTR if CDS and exon described),\
\ fix feature locations\n(e.g. check exon is embedded in the parent features mRNA,\
\ gene), etc...\n\nAll AGAT's scripts with the _sp_ prefix use the AGAT parser,\
\ before to\nperform any supplementary task. So, it is not necessary to run this\n\
script prior the use of any other _sp_ script.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_sp_gxf2gxf.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_sp_gxf2gxf"
executable: "target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,216 @@
name: "agat_sp_add_introns"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-f"
- "--ref"
- "--reffile"
description: "Input GTF/GFF file."
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gtf"
description: "Output GFF3 file."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT. The `--config` option \ngives you the possibility to use\
\ your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Add intronic elements to a gtf/gff file without intron features.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GTF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_add_introns.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_sp_add_introns/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_sp_add_introns"
executable: "target/executable/agat/agat_sp_add_introns/agat_sp_add_introns"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,266 @@
name: "agat_sp_filter_feature_from_kill_list"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-f"
- "--ref"
- "--reffile"
description: "Input GFF3 file that will be read."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--kill_list"
alternatives:
- "--kl"
description: "Text file containing the kill list. One value per line."
info: null
example:
- "kill_list.txt"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
description: "Path to the output GFF file that contains filtered features. \n"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--type"
alternatives:
- "-p"
- "-l"
description: "Primary tag option, case insensitive, list. Allow to specify the\
\ feature types that \nwill be handled. \n\nYou can specify a specific feature\
\ by giving its primary tag name (column 3) as: \n\n * cds\n * Gene\n * mRNA\n\
\ \nYou can specify directly all the feature of a particular\nlevel: \n\n \
\ * level2=mRNA,ncRNA,tRNA,etc \n * level3=CDS,exon,UTR,etc. \n\nBy default\
\ all features are taken into account. Fill the option with the value \"all\"\
\ will \nhave the same behaviour.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--attribute"
alternatives:
- "-a"
description: "Attribute tag to specify the attribute to analyse. Case sensitive.\
\ Default: ID\n"
info: null
example:
- "ID"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT.\nThe `--config` option gives you the possibility to use\
\ your own AGAT config file (located \nelsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
alternatives:
- "-v"
description: "Verbose option for debugging purpose."
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Remove features based on a kill list. The default behaviour is to look\
\ at the features's ID. \nIf the feature has an ID (case insensitive) listed among\
\ the kill list it will be removed.\nRemoving a level1 or level2 feature will automatically\
\ remove all linked subfeatures, and \nremoving all children of a feature will automatically\
\ remove this feature too.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "filtering"
- "gff"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_filter_feature_from_kill_list.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_sp_filter_feature_from_kill_list/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_sp_filter_feature_from_kill_list"
executable: "target/executable/agat/agat_sp_filter_feature_from_kill_list/agat_sp_filter_feature_from_kill_list"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -0,0 +1,214 @@
name: "agat_sp_merge_annotations"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-f"
description: "Input GTF/GFF file(s).\n"
info: null
example:
- "input1.gff;input2.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
description: "Output gff3 file where the gene incriminated will be writen."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT. \nThe `--config` option gives you the possibility to use\
\ your own AGAT config file (located\nelsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Merge different gff annotation files into one. It uses the AGAT parser\
\ that takes care of\nduplicated names and fixes other oddities met in those files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "merge"
- "gff"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_sp_merge_annotations/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_sp_merge_annotations"
executable: "target/executable/agat/agat_sp_merge_annotations/agat_sp_merge_annotations"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,261 @@
name: "agat_sp_statistics"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-i"
description: "Input GTF/GFF file."
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gs_fasta"
description: "Genome size directly from a fasta file to compute more statistics.\n"
info: null
example:
- "genome.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "The file where the results will be written.\n"
info: null
example:
- "output.txt"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--plot"
alternatives:
- "-p"
- "-d"
description: "When this option is used, an histogram of distribution of the features\
\ will be printed in pdf files.\n"
info: null
direction: "input"
- type: "integer"
name: "--gs_size"
description: "Genome size in nucleotides to compute more statistics.\n"
info: null
example:
- 1000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--verbose"
alternatives:
- "-v"
description: "Verbose option. To modify verbosity. Default is 1. 0 is quiet, 2\
\ and 3 are increasing verbosity.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT. The `--config`\noption gives you the possibility to use\
\ your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script provides exhaustive statistics of a gft/gff file. \n\nIf\
\ you have isoforms in your file, even if correct, some values calculated\nmight\
\ sounds incoherent: e.g. total length mRNA can be superior than the\ngenome size.\
\ Because all isoforms length is added... It is why by\ndefault we always compute\
\ the statistics twice when there are isoforms,\nonce with the isoforms, once without\
\ (In that case we keep the longest\nisoform per locus).\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "statistics"
- "gff"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_statistics.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/.*v\\.//; s/\\s.*//' | sed 's/^/AGAT: /' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_sp_statistics/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_sp_statistics"
executable: "target/executable/agat/agat_sp_statistics/agat_sp_statistics"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,257 @@
name: "agat_sq_stat_basic"
namespace: "agat"
version: "bump_viash_0_9_4"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-i"
- "--file"
- "--input"
description: "Input GTF/GFF file.\n"
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "integer"
name: "--genome_size"
alternatives:
- "-g"
description: "That input is designed to know the genome size in order to calculate\
\ the percentage of the genome represented by each kind of feature type. You\
\ can provide an INTEGER. Or you can also pass a fasta file using the argument\
\ --genome_size_fasta. If both are provided, only the value of --genome_size\
\ will be considered.\n"
info: null
example:
- 10000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_size_fasta"
description: "That input is designed to know the genome size in order to calculate\
\ the percentage of the genome represented by each kind of feature type. You\
\ can provide the genome in fasta format. Or you can also pass the size directly\
\ as an integer using the argument --genome_size. If you provide the fasta,\
\ the genome size will be calculated on the fly. If both are provided, only\
\ the value of --genome_size will be considered.\n"
info: null
example:
- "genome.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file. The result is in tabulate format.\n"
info: null
example:
- "output.txt"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "boolean_true"
name: "--inflate"
description: "Inflate the statistics taking into account feature with\nmulti-parents.\
\ Indeed to avoid redundant information, some gff\nfactorize identical features.\
\ e.g: one exon used in two\ndifferent isoform will be defined only once, and\
\ will have\nmultiple parent. By default the script count such feature only\n\
once. Using the inflate option allows to count the feature and\nits size as\
\ many time there are parents.\n"
info: null
direction: "input"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "AGAT config file. By default AGAT takes the original agat_config.yaml\
\ shipped with AGAT. The `--config` option gives you the possibility to use\
\ your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script aims to provide basic statistics of a gtf/gff file.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "gff"
- "statistics"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sq_stat_basic.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_sq_stat_basic/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_sq_stat_basic"
executable: "target/executable/agat/agat_sq_stat_basic/agat_sq_stat_basic"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,736 @@
name: "arriba"
version: "bump_viash_0_9_4"
authors:
- name: "Robrecht Cannoodt"
roles:
- "author"
- "maintainer"
info:
links:
email: "robrecht@data-intuitive.com"
github: "rcannood"
orcid: "0000-0003-3641-729X"
linkedin: "robrechtcannoodt"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Science Engineer"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Core Member"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--bam"
alternatives:
- "-x"
description: "File in SAM/BAM/CRAM format with main alignments as generated by\
\ STAR\n(Aligned.out.sam). Arriba extracts candidate reads from this file.\n"
info: null
example:
- "Aligned.out.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-a"
description: "FastA file with genome sequence (assembly). The file may be gzip-compressed.\
\ An \nindex with the file extension .fai must exist only if CRAM files are\
\ processed.\n"
info: null
example:
- "assembly.fa"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gene_annotation"
alternatives:
- "-g"
description: "GTF file with gene annotation. The file may be gzip-compressed.\n"
info: null
example:
- "annotation.gtf"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--known_fusions"
alternatives:
- "-k"
description: "File containing known/recurrent fusions. Some cancer entities are\
\ often \ncharacterized by fusions between the same pair of genes. In order\
\ to boost \nsensitivity, a list of known fusions can be supplied using this\
\ parameter. The list \nmust contain two columns with the names of the fused\
\ genes, separated by tabs.\n"
info: null
example:
- "known_fusions.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--blacklist"
alternatives:
- "-b"
description: "File containing blacklisted events (recurrent artifacts and transcripts\
\ \nobserved in healthy tissue).\n"
info: null
example:
- "blacklist.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--structural_variants"
alternatives:
- "-d"
description: "Tab-separated file with coordinates of structural variants found\
\ using \nwhole-genome sequencing data. These coordinates serve to increase\
\ sensitivity \ntowards weakly expressed fusions and to eliminate fusions with\
\ low evidence. \n"
info: null
example:
- "structural_variants_from_WGS.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tags"
alternatives:
- "-t"
description: "Tab-separated file containing fusions to annotate with tags in the\
\ 'tags' column. \nThe first two columns specify the genes; the third column\
\ specifies the tag. The \nfile may be gzip-compressed. \n"
info: null
example:
- "tags.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--protein_domains"
alternatives:
- "-p"
description: "File in GFF3 format containing coordinates of the protein domains\
\ of genes. The\nprotein domains retained in a fusion are listed in the column\n\
'retained_protein_domains'. The file may be gzip-compressed.\n"
info: null
example:
- "protein_domains.gff3"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--fusions"
alternatives:
- "-o"
description: "Output file with fusions that have passed all filters.\n"
info: null
example:
- "fusions.tsv"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fusions_discarded"
alternatives:
- "-O"
description: "Output file with fusions that were discarded due to filtering. \n"
info: null
example:
- "fusions.discarded.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "long"
name: "--max_genomic_breakpoint_distance"
alternatives:
- "-D"
description: "When a file with genomic breakpoints obtained via \nwhole-genome\
\ sequencing is supplied via the --structural_variants\nparameter, this parameter\
\ determines how far a \ngenomic breakpoint may be away from a \ntranscriptomic\
\ breakpoint to consider it as a \nrelated event. For events inside genes, the\
\ \ndistance is added to the end of the gene; for \nintergenic events, the distance\
\ threshold is \napplied as is. Default: 100000.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
alternatives:
- "-s"
description: "Whether a strand-specific protocol was used for library preparation,\
\ \nand if so, the type of strandedness (auto/yes/no/reverse). When \nunstranded\
\ data is processed, the strand can sometimes be inferred from \nsplice-patterns.\
\ But in unclear situations, stranded data helps \nresolve ambiguities. Default:\
\ auto\n"
info: null
required: false
choices:
- "auto"
- "yes"
- "no"
- "reverse"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--interesting_contigs"
alternatives:
- "-i"
description: "List of interesting contigs. Fusions between genes \non other contigs\
\ are ignored. Contigs can be specified with or without the \nprefix \"chr\"\
. Asterisks (*) are treated as wild-cards. \nDefault: 1 2 3 4 5 6 7 8 9 10 11\
\ 12 13 14 15 16 17 18 19 20 21 22 X Y AC_* NC_*\n"
info: null
example:
- "1"
- "2"
- "AC_*"
- "NC_*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--viral_contigs"
alternatives:
- "-v"
description: "List of viral contigs. Asterisks (*) are treated as \nwild-cards.\n\
Default: AC_* NC_*\n"
info: null
example:
- "AC_*"
- "NC_*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--disable_filters"
alternatives:
- "-f"
description: "List of filters to disable. By default all filters are \nenabled.\
\ \n"
info: null
required: false
choices:
- "homologs"
- "low_entropy"
- "isoforms"
- "top_expressed_viral_contigs"
- "viral_contigs"
- "uninteresting_contigs"
- "non_coding_neighbors"
- "mismatches"
- "duplicates"
- "no_genomic_support"
- "genomic_support"
- "intronic"
- "end_to_end"
- "relative_support"
- "low_coverage_viral_contigs"
- "merge_adjacent"
- "mismappers"
- "multimappers"
- "same_gene"
- "long_gap"
- "internal_tandem_duplication"
- "small_insert_size"
- "read_through"
- "inconsistently_clipped"
- "intragenic_exonic"
- "marginal_read_through"
- "spliced"
- "hairpin"
- "blacklist"
- "min_support"
- "select_best"
- "in_vitro"
- "short_anchor"
- "known_fusions"
- "no_coverage"
- "homopolymer"
- "many_spliced"
direction: "input"
multiple: true
multiple_sep: ";"
- type: "double"
name: "--max_e_value"
alternatives:
- "-E"
description: "Arriba estimates the number of fusions with a given number of supporting\
\ \nreads which one would expect to see by random chance. If the expected number\
\ \nof fusions (e-value) is higher than this threshold, the fusion is \ndiscarded\
\ by the 'relative_support' filter. Note: Increasing this \nthreshold can dramatically\
\ increase the number of false positives and may \nincrease the runtime of resource-intensive\
\ steps. Fractional values are \npossible. Default: 0.300000 \n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_supporting_reads"
alternatives:
- "-S"
description: "The 'min_support' filter discards all fusions with fewer than \n\
this many supporting reads (split reads and discordant mates \ncombined). Default:\
\ 2 \n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_mismappers"
alternatives:
- "-m"
description: "When more than this fraction of supporting reads turns out to be\
\ \nmismappers, the 'mismappers' filter discards the fusion. Default: \n0.800000\n"
info: null
example:
- 0.8
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_homolog_identity"
alternatives:
- "-L"
description: "Genes with more than the given fraction of sequence identity are\
\ \nconsidered homologs and removed by the 'homologs' filter. \nDefault: 0.300000\
\ \n"
info: null
example:
- 0.3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--homopolymer_length"
alternatives:
- "-H"
description: "The 'homopolymer' filter removes breakpoints adjacent to \nhomopolymers\
\ of the given length or more. Default: 6\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_through_distance"
alternatives:
- "-R"
description: "The 'read_through' filter removes read-through fusions \nwhere the\
\ breakpoints are less than the given distance away \nfrom each other. Default:\
\ 10000 \n"
info: null
example:
- 10000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_anchor_length"
alternatives:
- "-A"
description: "Alignment artifacts are often characterized by split reads coming\
\ \nfrom only one gene and no discordant mates. Moreover, the split \nreads\
\ only align to a short stretch in one of the genes. The \n'short_anchor' filter\
\ removes these fusions. This parameter sets \nthe threshold in bp for what\
\ the filter considers short. Default: 23 \n"
info: null
example:
- 23
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--many_spliced_events"
alternatives:
- "-M"
description: "The 'many_spliced' filter recovers fusions between genes that \n\
have at least this many spliced breakpoints. Default: 4\n"
info: null
example:
- 4
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_kmer_content"
alternatives:
- "-K"
description: "The 'low_entropy' filter removes reads with repetitive 3-mers. If\
\ \nthe 3-mers make up more than the given fraction of the sequence, then \n\
the read is discarded. Default: 0.600000 \n"
info: null
example:
- 0.6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_mismatch_pvalue"
alternatives:
- "-V"
description: "The 'mismatches' filter uses a binomial model to calculate a \n\
p-value for observing a given number of mismatches in a read. If \nthe number\
\ of mismatches is too high, the read is discarded. \nDefault: 0.010000 \n"
info: null
example:
- 0.05
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length"
alternatives:
- "-F"
description: "When paired-end data is given, the fragment length is estimated\
\ \nautomatically and this parameter has no effect. But when single-end \ndata\
\ is given, the mean fragment length should be specified to \neffectively filter\
\ fusions that arise from hairpin structures. \nDefault: 200 \n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_reads"
alternatives:
- "-U"
description: "Subsample fusions with more than the given number of supporting\
\ reads. This \nimproves performance without compromising sensitivity, as long\
\ as the \nthreshold is high. Counting of supporting reads beyond the threshold\
\ is \ninaccurate, obviously. Default: 300 \n"
info: null
example:
- 300
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--quantile"
alternatives:
- "-Q"
description: "Highly expressed genes are prone to produce artifacts during library\
\ \npreparation. Genes with an expression above the given quantile are eligible\
\ \nfor filtering by the 'in_vitro' filter. Default: 0.998000\n"
info: null
example:
- 0.998
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--exonic_fraction"
alternatives:
- "-e"
description: "The breakpoints of false-positive predictions of intragenic events\
\ \nare often both in exons. True predictions are more likely to have at \n\
least one breakpoint in an intron, because introns are larger. If the \nfraction\
\ of exonic sequence between two breakpoints is smaller than \nthe given fraction,\
\ the 'intragenic_exonic' filter discards the \nevent. Default: 0.330000 \n"
info: null
example:
- 0.33
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--top_n"
alternatives:
- "-T"
description: "Only report viral integration sites of the top N most highly expressed\
\ viral \ncontigs. Default: 5\n"
info: null
example:
- 5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--covered_fraction"
alternatives:
- "-C"
description: "Ignore virally associated events if the virus is not fully \nexpressed,\
\ i.e., less than the given fraction of the viral contig is \ntranscribed. Default:\
\ 0.050000 \n"
info: null
example:
- 0.05
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_itd_length"
alternatives:
- "-l"
description: "Maximum length of internal tandem duplications. Note: Increasing\
\ \nthis value beyond the default can impair performance and lead to many \n\
false positives. Default: 100 \n"
info: null
example:
- 100
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--min_itd_allele_fraction"
alternatives:
- "-z"
description: "Required fraction of supporting reads to report an internal \ntandem\
\ duplication. Default: 0.070000 \n"
info: null
example:
- 0.07
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_itd_supporting_reads"
alternatives:
- "-Z"
description: "Required absolute number of supporting reads to report an \ninternal\
\ tandem duplication. Default: 10 \n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--skip_duplicate_marking"
alternatives:
- "-u"
description: "Instead of performing duplicate marking itself, Arriba relies on\
\ duplicate marking by a \npreceding program using the BAM_FDUP flag. This makes\
\ sense when unique molecular \nidentifiers (UMI) are used.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--extra_information"
alternatives:
- "-X"
description: "To reduce the runtime and file size, by default, the columns 'fusion_transcript',\
\ \n'peptide_sequence', and 'read_identifiers' are left empty in the file containing\
\ \ndiscarded fusion candidates (see parameter -O). When this flag is set, this\
\ extra \ninformation is reported in the discarded fusions file.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--fill_gaps"
alternatives:
- "-I"
description: "If assembly of the fusion transcript sequence from the supporting\
\ reads is incomplete \n(denoted as '...'), fill the gaps using the assembly\
\ sequence wherever possible. \n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Detect gene fusions from RNA-Seq data"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
cpus: 1
commands:
- "ps"
keywords:
- "Gene fusion"
- "RNA-Seq"
license: "MIT"
references:
doi:
- "10.1101/gr.257246.119"
links:
repository: "https://github.com/suhrig/arriba"
homepage: "https://arriba.readthedocs.io/en/latest/"
documentation: "https://arriba.readthedocs.io/en/latest/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/arriba:2.4.0--h0033a41_2"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\\s\\(.*\\)/arriba: \"\\\
1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/arriba/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/arriba"
executable: "target/executable/arriba/arriba"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

2366
target/executable/arriba/arriba Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,424 @@
name: "bases2fastq"
version: "bump_viash_0_9_4"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--analysis_directory"
description: "Location of analysis directory"
info: null
example:
- "input"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--run_manifest"
alternatives:
- "-r"
description: "Location of run manifest to use instead of default RunManifest.csv\
\ found in analysis directory"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_directory"
alternatives:
- "-o"
description: "Location to save output fastqs"
info: null
example:
- "fastq_dir"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--report"
description: "Output location for the HTML report"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--logs"
description: "Directory containing log files"
info: null
example:
- "logs_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--chemistry_version"
description: "Run parameters override, chemistry version."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--demux_only"
alternatives:
- "-d"
description: "Generate demux files and indexing stats without generating FASTQ\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--detect_adapters"
description: "Detect adapters sequences, overriding any sequences present in run\
\ manifest.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--error_on_missing"
description: "Terminate execution for a missing file (by default, missing files\
\ are\nskipped and execution continues). Also set by --strict.\n"
info: null
direction: "input"
- type: "string"
name: "--exclude_tile"
alternatives:
- "-e"
description: "Regex matching tile names to exclude. This flag can be specified\
\ multiple times. (e.g. L1.*C0[23]S.)\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--filter_mask"
description: "Run parameters override, custom pass filter mask.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--flowcell_id"
description: "Run parameters override, flowcell ID.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--force_index_orientation"
description: "Do not attempt to find orientation for I1/I2 reads (reverse complement).\n\
Use orientation given in run manifest.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--group_fastq"
description: "Group all FASTQ/stats/metrics for a project are in the project folder.\n"
info: null
direction: "input"
- type: "integer"
name: "--i1_cycles"
description: "Run parameters override, I1 cycles.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--i2_cycles"
description: "Run parameters override, I2 cycles\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--include_tile"
alternatives:
- "-i"
description: "Regex matching tile names to include. This flag\ncan be specified\
\ multiple times. (e.g. L1.*C0[23]S.)\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--kit_configuration"
description: "Run parameters override, kit configuration.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--legacy_fastq"
description: "Legacy naming for FASTQ files (e.g. SampleName_S1_L001_R1_001.fastq.gz)\n"
info: null
direction: "input"
- type: "string"
name: "--log_level"
alternatives:
- "-l"
description: "Severity level for logging.\n"
info: null
example:
- "INFO"
required: false
choices:
- "DEBUG"
- "INFO"
- "WARNING"
- "ERROR"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_error_on_invalid"
description: "Skip invalid files and continue execution. Overridden by --strict\
\ options\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_projects"
description: "Disable project directories\n"
info: null
direction: "input"
- type: "integer"
name: "--num_unassigned"
description: "Max Number of unassigned sequences to report.\n"
info: null
example:
- 30
required: false
min: 0
max: 1000
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--preparation_workflow"
description: "Run parameters override, preparation workflow. \n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--qc_only"
description: "Quickly generate run stats for single tile without generating FASTQ.\n\
Use --include_tile/--exclude_tile to define custom tile set.\n"
info: null
direction: "input"
- type: "integer"
name: "--r1_cycles"
description: "Run parameters override, R1 cycles.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--r2_cycles"
description: "Run parameters override, R2 cycles.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--split_lanes"
description: "Split FASTQ files by lane.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--strict"
description: "In strict mode any invalid or missing input file will terminate\
\ execution \n(overrides no_error_on_invalid and sets --error_on_missing)\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Bases2Fastq demultiplexes sequencing data generated by Element Biosciences\
\ instruments and converts base calls into FASTQ files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "demultiplex"
- "fastq"
- "demux"
- "Element Biosciences"
license: "Proprietairy"
links:
repository: "https://github.com/viash-hub/biobox"
documentation: "https://docs.elembio.io/docs/bases2fastq/introduction/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "elembio/bases2fastq:2.1.0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "tree"
interactive: false
- type: "docker"
run:
- "echo \"bases2fastq: $(bases2fastq --version | cut -d' ' -f3)\" > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "curl"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bases2fastq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bases2fastq"
executable: "target/executable/bases2fastq/bases2fastq"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,398 @@
name: "bbmap_bbsplit"
namespace: "bbmap"
version: "bump_viash_0_9_4"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--paired"
description: "Paired fastq files or not?"
info: null
direction: "input"
- type: "file"
name: "--input"
description: "Input fastq files, either one or two (paired), separated by \";\"\
."
info: null
example:
- "reads.fastq"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--ref"
description: "Reference FASTA files, separated by \";\". The primary reference\
\ should be specified first."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--only_build_index"
description: "If set, only builds the index. Otherwise, mapping is performed."
info: null
direction: "input"
- type: "file"
name: "--build"
description: "Index to be used for mapping. \n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--qin"
description: "Set to 33 or 64 to specify input quality value ASCII offset. Automatically\
\ detected if\nnot specified.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--interleaved"
description: "True forces paired/interleaved input; false forces single-ended\
\ mapping.\nIf not specified, interleaved status will be autodetected from read\
\ names.\n"
info: null
direction: "input"
- type: "integer"
name: "--maxindel"
description: "Don't look for indels longer than this. Lower is faster. Set to\
\ >=100k for RNA-seq.\n"
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--minratio"
description: "Fraction of max alignment score required to keep a site. Higher\
\ is faster.\n"
info: null
example:
- 0.56
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--minhits"
description: "Minimum number of seed hits required for candidate sites. Higher\
\ is faster.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--ambiguous"
description: "Set behavior on ambiguously-mapped reads (with multiple top-scoring\
\ mapping locations).\n * best Use the first best site (Default)\n * toss\
\ Consider unmapped\n * random Select one top-scoring site randomly\n \
\ * all Retain all top-scoring sites. Does not work yet with SAM output\n"
info: null
example:
- "best"
required: false
choices:
- "best"
- "toss"
- "random"
- "all"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--ambiguous2"
description: "Set behavior only for reads that map ambiguously to multiple different\
\ references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\n\
Ambiguous2 excludes reads that map ambiguously within a single reference.\n\
\ * best Use the first best site (Default)\n * toss Consider unmapped\n\
\ * all Write a copy to the output for each reference to which it maps\n\
\ * split Write a copy to the AMBIGUOUS_ output for each reference to which\
\ it maps\n"
info: null
example:
- "best"
required: false
choices:
- "best"
- "toss"
- "all"
- "split"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--qtrim"
description: "Quality-trim ends to Q5 before mapping. Options are 'l' (left),\
\ 'r' (right), and 'lr' (both).\n"
info: null
required: false
choices:
- "l"
- "r"
- "lr"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--untrim"
description: "Undo trimming after mapping. Untrimmed bases will be soft-clipped\
\ in cigar strings."
info: null
direction: "input"
- name: "Output"
arguments:
- type: "file"
name: "--index"
description: "Location to write the index.\n"
info: null
example:
- "BBSplit_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_1"
description: "Output file for read 1.\n"
info: null
example:
- "read_out1.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Output file for read 2.\n"
info: null
example:
- "read_out2.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sam2bam"
alternatives:
- "--bs"
description: "Write a shell script to 'file' that will turn the sam output into\
\ a sorted, indexed bam file.\n"
info: null
example:
- "script.sh"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--scafstats"
description: "Write statistics on how many reads mapped to which scaffold to this\
\ file.\n"
info: null
example:
- "scaffold_stats.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refstats"
description: "Write statistics on how many reads were assigned to which reference\
\ to this file.\nUnmapped reads whose mate mapped to a reference are considered\
\ assigned and will be counted.\n"
info: null
example:
- "reference_stats.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--nzo"
description: "Only print lines with nonzero coverage."
info: null
direction: "input"
- type: "string"
name: "--bbmap_args"
description: "Additional arguments from BBMap to pass to BBSplit.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Split sequencing reads by mapping them to multiple references simultaneously."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
license: "BBTools Copyright (c) 2014"
links:
repository: "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh"
homepage: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/"
documentation: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\
\ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\
\ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n"
- type: "docker"
run:
- "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \"BBMAP:\", $NF}' >\
\ /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bbmap/bbmap_bbsplit/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bbmap/bbmap_bbsplit"
executable: "target/executable/bbmap/bbmap_bbsplit/bbmap_bbsplit"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,499 @@
name: "bcftools_annotate"
namespace: "bcftools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output annotated file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
description: "For examples on how to use use bcftools annotate see http://samtools.github.io/bcftools/howtos/annotate.html.\n\
For more details on the options see https://samtools.github.io/bcftools/bcftools.html#annotate.\n"
arguments:
- type: "file"
name: "--annotations"
alternatives:
- "--a"
description: "VCF file or tabix-indexed FILE with annotations: CHR\\tPOS[\\tVALUE]+\
\ . \n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--columns"
alternatives:
- "--c"
description: "List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG.\
\ \nSee man page for details.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--columns_file"
alternatives:
- "--C"
description: "Read -c columns from FILE, one name per row, with optional --merge_logic\
\ TYPE: NAME[ TYPE].\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--exclude"
alternatives:
- "--e"
description: "Exclude sites for which the expression is true.\nSee https://samtools.github.io/bcftools/bcftools.html#expressions\
\ for details.\n"
info: null
example:
- "QUAL >= 30 && DP >= 10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--force"
description: "continue even when parsing errors, such as undefined tags, are encountered.\
\ \nNote this can be an unsafe operation and can result in corrupted BCF files.\
\ \nIf this option is used, make sure to sanity check the result thoroughly.\n"
info: null
direction: "input"
- type: "string"
name: "--header_line"
alternatives:
- "--H"
description: "Header line which should be appended to the VCF header, can be given\
\ multiple times.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--header_lines"
alternatives:
- "--h"
description: "File with header lines to append to the VCF header.\nFor example:\n\
\ ##INFO=<ID=NUMERIC_TAG,Number=1,Type=Integer,Description=\"Example header\
\ line\">\n ##INFO=<ID=STRING_TAG,Number=1,Type=String,Description=\"Yet another\
\ header line\">\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--set_id"
alternatives:
- "--I"
description: "Set ID column using a `bcftools query`-like expression, see man\
\ page for details.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--include"
description: "Select sites for which the expression is true.\nSee https://samtools.github.io/bcftools/bcftools.html#expressions\
\ for details.\n"
info: null
example:
- "QUAL >= 30 && DP >= 10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--keep_sites"
alternatives:
- "--k"
description: "Leave --include/--exclude sites unchanged instead of discarding\
\ them.\n"
info: null
direction: "input"
- type: "string"
name: "--merge_logic"
alternatives:
- "--l"
description: "When multiple regions overlap a single record, this option defines\
\ how to treat multiple annotation values.\nSee man page for more details.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--mark_sites"
alternatives:
- "--m"
description: "Annotate sites which are present (\"+\") or absent (\"-\") in the\
\ -a file with a new INFO/TAG flag.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--min_overlap"
description: "Minimum overlap required as a fraction of the variant in the annotation\
\ -a file (ANN), \nin the target VCF file (:VCF), or both for reciprocal overlap\
\ (ANN:VCF). \nBy default overlaps of arbitrary length are sufficient. \nThe\
\ option can be used only with the tab-delimited annotation -a file and with\
\ BEG and END columns present.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_version"
description: "Do not append version and command line information to the output\
\ VCF header.\n"
info: null
direction: "input"
- type: "string"
name: "--output_type"
alternatives:
- "--O"
description: "Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed\
\ BCF\n v: uncompressed VCF\n"
info: null
required: false
choices:
- "u"
- "z"
- "b"
- "v"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--pair_logic"
description: "Controls how to match records from the annotation file to the target\
\ VCF. \nEffective only when -a is a VCF or BCF file. \nThe option replaces\
\ the former uninuitive --collapse. \nSee Common Options for more.\n"
info: null
required: false
choices:
- "snps"
- "indels"
- "both"
- "all"
- "some"
- "exact"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions"
alternatives:
- "--r"
description: "Restrict to comma-separated list of regions. \nFollowing formats\
\ are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].\n"
info: null
example:
- "20:1000000-2000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--regions_file"
alternatives:
- "--R"
description: "Restrict to regions listed in a file. \nRegions can be specified\
\ either on a VCF, BED, or tab-delimited file (the default). \nFor more information\
\ check manual.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions_overlap"
description: "This option controls how overlapping records are determined: \n\
set to 'pos' or '0' if the VCF record has to have POS inside a region (this\
\ corresponds to the default behavior of -t/-T); \nset to 'record' or '1' if\
\ also overlapping records with POS outside a region should be included (this\
\ is the default behavior of -r/-R, \nand includes indels with POS at the end\
\ of a region, which are technically outside the region); \nor set to 'variant'\
\ or '2' to include only true overlapping variation (compare the full VCF representation\
\ \"TA>T-\" vs the true sequence variation \"A>-\").\n"
info: null
required: false
choices:
- "pos"
- "record"
- "variant"
- "0"
- "1"
- "2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--rename_annotations"
description: "Rename annotations: TYPE/old\\tnew, where TYPE is one of FILTER,INFO,FORMAT.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--rename_chromosomes"
description: "Rename chromosomes according to the map in file, with \"old_name\
\ new_name\\n\" pairs \nseparated by whitespaces, each on a separate line.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--samples"
description: "Subset of samples to annotate.\nSee also https://samtools.github.io/bcftools/bcftools.html#common_options.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--samples_file"
description: "Subset of samples to annotate in file format.\nSee also https://samtools.github.io/bcftools/bcftools.html#common_options.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--single_overlaps"
description: "Use this option to keep memory requirements low with very large\
\ annotation files. \nNote, however, that this comes at a cost, only single\
\ overlapping intervals are considered in this mode. \nThis was the default\
\ mode until the commit af6f0c9 (Feb 24 2019).\n"
info: null
direction: "input"
- type: "string"
name: "--remove"
alternatives:
- "--x"
description: "List of annotations to remove. \nUse \"FILTER\" to remove all filters\
\ or \"FILTER/SomeFilter\" to remove a specific filter. \nSimilarly, \"INFO\"\
\ can be used to remove all INFO tags and \"FORMAT\" to remove all FORMAT tags\
\ except GT. \nTo remove all INFO tags except \"FOO\" and \"BAR\", use \"^INFO/FOO,INFO/BAR\"\
\ (and similarly for FORMAT and FILTER). \n\"INFO\" can be abbreviated to \"\
INF\" and \"FORMAT\" to \"FMT\".\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Add or remove annotations from a VCF/BCF file.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Annotate"
- "VCF"
- "BCF"
license: "MIT/Expat, GNU"
references:
doi:
- "https://doi.org/10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/bcftools"
homepage: "https://samtools.github.io/bcftools/"
documentation: "https://samtools.github.io/bcftools/bcftools.html#annotate"
issue_tracker: "https://github.com/samtools/bcftools/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bcftools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bcftools: \\\"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools\
\ //p')\\\"\" > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "tabix"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcftools/bcftools_annotate/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcftools/bcftools_annotate"
executable: "target/executable/bcftools/bcftools_annotate/bcftools_annotate"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,365 @@
name: "bcftools_concat"
namespace: "bcftools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input VCF/BCF files to concatenate."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--file_list"
alternatives:
- "-f"
description: "Read the list of VCF/BCF files from a file, one file name per line."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output concatenated VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--allow_overlaps"
alternatives:
- "-a"
description: "First coordinate of the next file can precede last record of the\
\ current file.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--compact_PS"
alternatives:
- "-c"
description: "Do not output PS tag at each site, only at the start of a new phase\
\ set block.\n"
info: null
direction: "input"
- type: "string"
name: "--remove_duplicates"
alternatives:
- "-d"
description: "Output duplicate records present in multiple files only once: <snps|indels|both|all|exact>.\n\
\ \n"
info: null
required: false
choices:
- "snps"
- "indels"
- "both"
- "all"
- "exact"
- "none"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--ligate"
alternatives:
- "-l"
description: "Ligate phased VCFs by matching phase at overlapping haplotypes."
info: null
direction: "input"
- type: "boolean_true"
name: "--ligate_force"
description: "Ligate even non-overlapping chunks, keep all sites."
info: null
direction: "input"
- type: "boolean_true"
name: "--ligate_warn"
description: "Drop sites in imperfect overlaps."
info: null
direction: "input"
- type: "boolean_true"
name: "--no_version"
description: "Do not append version and command line information to the header."
info: null
direction: "input"
- type: "boolean_true"
name: "--naive"
alternatives:
- "-n"
description: "Concatenate files without recompression, a header check compatibility\
\ is performed."
info: null
direction: "input"
- type: "boolean_true"
name: "--naive_force"
description: "Same as --naive, but header compatibility is not checked. \nDangerous,\
\ use with caution.\n"
info: null
direction: "input"
- type: "string"
name: "--output_type"
alternatives:
- "-O"
description: "Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed\
\ BCF\n v: uncompressed VCF\n"
info: null
required: false
choices:
- "u"
- "z"
- "b"
- "v"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_PQ"
alternatives:
- "-q"
description: "Break phase set if phasing quality is lower than <int>."
info: null
example:
- 30
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions"
alternatives:
- "-r"
description: "Restrict to comma-separated list of regions. \nFollowing formats\
\ are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].\n"
info: null
example:
- "20:1000000-2000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--regions_file"
alternatives:
- "-R"
description: "Restrict to regions listed in a file. \nRegions can be specified\
\ either on a VCF, BED, or tab-delimited file (the default). \nFor more information\
\ check manual.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions_overlap"
description: "This option controls how overlapping records are determined: \n\
set to 'pos' or '0' if the VCF record has to have POS inside a region (this\
\ corresponds to the default behavior of -t/-T); \nset to 'record' or '1' if\
\ also overlapping records with POS outside a region should be included (this\
\ is the default behavior of -r/-R, \nand includes indels with POS at the end\
\ of a region, which are technically outside the region); \nor set to 'variant'\
\ or '2' to include only true overlapping variation (compare the full VCF representation\
\ \"TA>T-\" vs the true sequence variation \"A>-\").\n"
info: null
required: false
choices:
- "pos"
- "record"
- "variant"
- "0"
- "1"
- "2"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Concatenate or combine VCF/BCF files. All source files must have the\
\ same sample\ncolumns appearing in the same order. The program can be used, for\
\ example, to\nconcatenate chromosome VCFs into one VCF, or combine a SNP VCF and\
\ an indel\nVCF into one. The input files must be sorted by chr and position. The\
\ files\nmust be given in the correct order to produce sorted VCF on output unless\n\
the -a, --allow-overlaps option is specified. With the --naive option, the files\n\
are concatenated without being recompressed, which is very fast.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Concatenate"
- "VCF"
- "BCF"
license: "MIT/Expat, GNU"
references:
doi:
- "https://doi.org/10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/bcftools"
homepage: "https://samtools.github.io/bcftools/"
documentation: "https://samtools.github.io/bcftools/bcftools.html#concat"
issue_tracker: "https://github.com/samtools/bcftools/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bcftools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bcftools: \\\"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools\
\ //p')\\\"\" > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "tabix"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcftools/bcftools_concat/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcftools/bcftools_concat"
executable: "target/executable/bcftools/bcftools_concat/bcftools_concat"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,446 @@
name: "bcftools_norm"
namespace: "bcftools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output normalized VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--atomize"
alternatives:
- "-a"
description: "Decompose complex variants (e.g., MNVs become consecutive SNVs).\n"
info: null
direction: "input"
- type: "string"
name: "--atom_overlaps"
description: "Use the star allele (*) for overlapping alleles or set to missing\
\ (.).\n"
info: null
required: false
choices:
- "."
- "*"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--check_ref"
alternatives:
- "-c"
description: "Check REF alleles and exit (e), warn (w), exclude (x), or set (s)\
\ bad sites.\n"
info: null
required: false
choices:
- "e"
- "w"
- "x"
- "s"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--remove_duplicates"
alternatives:
- "-d"
description: "Remove duplicate snps, indels, both, all, exact matches, or none\
\ (old -D option)."
info: null
required: false
choices:
- "snps"
- "indels"
- "both"
- "all"
- "exact"
- "none"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fasta_ref"
alternatives:
- "-f"
description: "Reference fasta sequence file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--force"
description: "Try to proceed even if malformed tags are encountered. \nExperimental,\
\ use at your own risk.\n"
info: null
direction: "input"
- type: "string"
name: "--keep_sum"
description: "Keep vector sum constant when splitting multiallelics (see github\
\ issue #360).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--multiallelics"
alternatives:
- "-m"
description: "Split multiallelics (-) or join biallelics (+), type: snps, indels,\
\ both, any [default: both].\n"
info: null
required: false
choices:
- "+snps"
- "+indels"
- "+both"
- "+any"
- "-snps"
- "-indels"
- "-both"
- "-any"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_version"
description: "Do not append version and command line information to the header."
info: null
direction: "input"
- type: "boolean_true"
name: "--do_not_normalize"
alternatives:
- "-N"
description: "Do not normalize indels (with -m or -c s)."
info: null
direction: "input"
- type: "string"
name: "--output_type"
alternatives:
- "--O"
description: "Output type:\n u: uncompressed BCF\n z: compressed VCF\n b: compressed\
\ BCF\n v: uncompressed VCF\n"
info: null
required: false
choices:
- "u"
- "z"
- "b"
- "v"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--old_rec_tag"
description: "Annotate modified records with INFO/STR indicating the original\
\ variant."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions"
alternatives:
- "--r"
description: "Restrict to comma-separated list of regions. \nFollowing formats\
\ are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].\n"
info: null
example:
- "20:1000000-2000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--regions_file"
alternatives:
- "--R"
description: "Restrict to regions listed in a file. \nRegions can be specified\
\ either on a VCF, BED, or tab-delimited file (the default). \nFor more information\
\ check manual.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions_overlap"
description: "This option controls how overlapping records are determined: \n\
set to 'pos' or '0' if the VCF record has to have POS inside a region (this\
\ corresponds to the default behavior of -t/-T); \nset to 'record' or '1' if\
\ also overlapping records with POS outside a region should be included (this\
\ is the default behavior of -r/-R, \nand includes indels with POS at the end\
\ of a region, which are technically outside the region); \nor set to 'variant'\
\ or '2' to include only true overlapping variation (compare the full VCF representation\
\ \"TA>T-\" vs the true sequence variation \"A>-\").\n"
info: null
required: false
choices:
- "pos"
- "record"
- "variant"
- "0"
- "1"
- "2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--site_win"
alternatives:
- "-w"
description: "Buffer for sorting lines that changed position during realignment.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--strict_filter"
alternatives:
- "-s"
description: "When merging (-m+), merged site is PASS only if all sites being\
\ merged PASS."
info: null
direction: "input"
- type: "string"
name: "--targets"
alternatives:
- "-t"
description: "Similar to --regions but streams rather than index-jumps."
info: null
example:
- "20:1000000-2000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--targets_file"
alternatives:
- "-T"
description: "Similar to --regions_file but streams rather than index-jumps."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--targets_overlap"
description: "Include if POS in the region (0), record overlaps (1), variant overlaps\
\ (2).\nSimilar to --regions_overlap.\n"
info: null
required: false
choices:
- "pos"
- "record"
- "variant"
- "0"
- "1"
- "2"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Left-align and normalize indels, check if REF alleles match the reference,\
\ split multiallelic sites into multiple rows; \nrecover multiallelics from multiple\
\ rows. \n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Normalize"
- "VCF"
- "BCF"
license: "MIT/Expat, GNU"
references:
doi:
- "https://doi.org/10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/bcftools"
homepage: "https://samtools.github.io/bcftools/"
documentation: "https://samtools.github.io/bcftools/bcftools.html#norm"
issue_tracker: "https://github.com/samtools/bcftools/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bcftools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bcftools: \\\"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools\
\ //p')\\\"\" > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "tabix"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcftools/bcftools_norm/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcftools/bcftools_norm"
executable: "target/executable/bcftools/bcftools_norm/bcftools_norm"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,215 @@
name: "bcftools_sort"
namespace: "bcftools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output sorted VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "string"
name: "--output_type"
alternatives:
- "-O"
description: "Compresses or uncompresses the output.\nThe options are:\n b: compressed\
\ BCF, \n u: uncompressed BCF, \n z: compressed VCF, \n v: uncompressed VCF.\
\ \n"
info: null
required: false
choices:
- "b"
- "u"
- "z"
- "v"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Sorts VCF/BCF files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Sort"
- "VCF"
- "BCF"
license: "MIT/Expat, GNU"
references:
doi:
- "https://doi.org/10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/bcftools"
homepage: "https://samtools.github.io/bcftools/"
documentation: "https://samtools.github.io/bcftools/bcftools.html#sort"
issue_tracker: "https://github.com/samtools/bcftools/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bcftools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bcftools: \\\"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools\
\ //p')\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcftools/bcftools_sort/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcftools/bcftools_sort"
executable: "target/executable/bcftools/bcftools_sort/bcftools_sort"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,488 @@
name: "bcftools_stats"
namespace: "bcftools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input VCF/BCF file. Maximum of two files."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output txt statistics file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "string"
name: "--allele_frequency_bins"
alternatives:
- "--af_bins"
description: "Allele frequency bins, a list of bin values (0.1,0.5,1).\n"
info: null
example:
- "0.1,0.5,1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--allele_frequency_bins_file"
alternatives:
- "--af_bins_file"
description: "Same as allele_frequency_bins, but in a file.\nFormat of file is\
\ one value per line. \ne.g. \n 0.1\n 0.5\n 1\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--allele_frequency_tag"
alternatives:
- "--af_tag"
description: "Allele frequency tag to use, by default estimated from AN,AC or\
\ GT.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--first_allele_only"
alternatives:
- "--first_only"
description: "Include only 1st allele at multiallelic sites.\n"
info: null
direction: "input"
- type: "string"
name: "--collapse"
alternatives:
- "--c"
description: "Treat as identical records with <snps|indels|both|all|some|none>.\n\
See https://samtools.github.io/bcftools/bcftools.html#common_options for details.\n"
info: null
required: false
choices:
- "snps"
- "indels"
- "both"
- "all"
- "some"
- "none"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--depth"
alternatives:
- "--d"
description: "Depth distribution: min,max,bin size.\n"
info: null
example:
- "0,500,1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--exclude"
alternatives:
- "--e"
description: "Exclude sites for which the expression is true.\nSee https://samtools.github.io/bcftools/bcftools.html#expressions\
\ for details.\n"
info: null
example:
- "QUAL < 30 && DP < 10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--exons"
alternatives:
- "--E"
description: "tab-delimited file with exons for indel frameshifts statistics.\
\ \nThe columns of the file are CHR, FROM, TO, with 1-based, inclusive, positions.\
\ \nThe file is BGZF-compressed and indexed with tabix (e.g. tabix -s1 -b2 -e3\
\ file.gz).\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--apply_filters"
alternatives:
- "--f"
description: "Require at least one of the listed FILTER strings (e.g. \"PASS,.\"\
).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fasta_reference"
alternatives:
- "--F"
description: "Faidx indexed reference sequence file to determine INDEL context.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--include"
alternatives:
- "--i"
description: "Select sites for which the expression is true.\nSee https://samtools.github.io/bcftools/bcftools.html#expressions\
\ for details.\n"
info: null
example:
- "QUAL >= 30 && DP >= 10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--split_by_ID"
alternatives:
- "--I"
description: "Collect stats for sites with ID separately (known vs novel).\n"
info: null
direction: "input"
- type: "string"
name: "--regions"
alternatives:
- "--r"
description: "Restrict to comma-separated list of regions. \nFollowing formats\
\ are supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].\n"
info: null
example:
- "20:1000000-2000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--regions_file"
alternatives:
- "--R"
description: "Restrict to regions listed in a file. \nRegions can be specified\
\ either on a VCF, BED, or tab-delimited file (the default). \nFor more information\
\ check manual.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--regions_overlap"
description: "This option controls how overlapping records are determined: \n\
set to 'pos' or '0' if the VCF record has to have POS inside a region (this\
\ corresponds to the default behavior of -t/-T); \nset to 'record' or '1' if\
\ also overlapping records with POS outside a region should be included (this\
\ is the default behavior of -r/-R, \nand includes indels with POS at the end\
\ of a region, which are technically outside the region); \nor set to 'variant'\
\ or '2' to include only true overlapping variation (compare the full VCF representation\
\ \"TA>T-\" vs the true sequence variation \"A>-\").\n"
info: null
required: false
choices:
- "pos"
- "record"
- "variant"
- "0"
- "1"
- "2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--samples"
alternatives:
- "--s"
description: "List of samples for sample stats, \"-\" to include all samples.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--samples_file"
alternatives:
- "--S"
description: "File of samples to include.\ne.g. \n sample1 1\n sample2 \
\ 2\n sample3 2\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--targets"
alternatives:
- "--t"
description: "Similar as -r, --regions, but the next position is accessed by streaming\
\ the whole VCF/BCF \nrather than using the tbi/csi index. Both -r and -t options\
\ can be applied simultaneously: -r uses the \nindex to jump to a region and\
\ -t discards positions which are not in the targets. Unlike -r, targets \n\
can be prefixed with \"^\" to request logical complement. For example, \"^X,Y,MT\"\
\ indicates that \nsequences X, Y and MT should be skipped. Yet another difference\
\ between the -t/-T and -r/-R is \nthat -r/-R checks for proper overlaps and\
\ considers both POS and the end position of an indel, \nwhile -t/-T considers\
\ the POS coordinate only (by default; see also --regions-overlap and --targets-overlap).\
\ \nNote that -t cannot be used in combination with -T.\nFollowing formats are\
\ supported: chr|chr:pos|chr:beg-end|chr:beg-[,…​].\n"
info: null
example:
- "20:1000000-2000000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--targets_file"
alternatives:
- "--T"
description: "Similar to --regions_file option but streams rather than index-jumps.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--targets_overlaps"
description: "Include if POS in the region (0), record overlaps (1), variant overlaps\
\ (2).\n"
info: null
required: false
choices:
- "pos"
- "record"
- "variant"
- "0"
- "1"
- "2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--user_tstv"
alternatives:
- "--u"
description: "Collect Ts/Tv stats for any tag using the given binning [0:1:100].\n\
Format is <TAG[:min:max:n]>.\nA subfield can be selected as e.g. 'PV4[0]', here\
\ the first value of the PV4 tag.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
alternatives:
- "--v"
description: "Produce verbose per-site and per-sample output.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Parses VCF or BCF and produces a txt stats file which can be plotted\
\ using plot-vcfstats.\nWhen two files are given, the program generates separate\
\ stats for intersection\nand the complements. By default only sites are compared,\
\ -s/-S must given to include\nalso sample columns.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Stats"
- "VCF"
- "BCF"
license: "MIT/Expat, GNU"
references:
doi:
- "https://doi.org/10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/bcftools"
homepage: "https://samtools.github.io/bcftools/"
documentation: "https://samtools.github.io/bcftools/bcftools.html#stats"
issue_tracker: "https://github.com/samtools/bcftools/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bcftools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bcftools: \\\"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools\
\ //p')\\\"\" > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "tabix"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcftools/bcftools_stats/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcftools/bcftools_stats"
executable: "target/executable/bcftools/bcftools_stats/bcftools_stats"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,458 @@
name: "bcl_convert"
version: "bump_viash_0_9_4"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
- name: "Dorien Roosen"
roles:
- "author"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--bcl_input_directory"
alternatives:
- "-i"
description: "Input run directory"
info: null
example:
- "bcl_dir"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_sheet"
alternatives:
- "-s"
description: "Path to SampleSheet.csv file (default searched for in --bcl_input_directory)"
info: null
example:
- "bcl_dir/sample_sheet.csv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--run_info"
description: "Path to RunInfo.xml file (default root of BCL input directory)"
info: null
example:
- "bcl_dir/RunInfo.xml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Lane and tile settings"
arguments:
- type: "integer"
name: "--bcl_only_lane"
description: "Convert only specified lane number (default all lanes)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--first_tile_only"
description: "Only convert first tile of input (for testing & debugging)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--tiles"
description: "Process only a subset of tiles by a regular expression"
info: null
example:
- "s_[0-9]+_1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--exclude_tiles"
description: "Exclude set of tiles by a regular expression"
info: null
example:
- "s_[0-9]+_1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Resource arguments"
arguments:
- type: "boolean"
name: "--shared_thread_odirect_output"
description: "Use linux native asynchronous io (io_submit) for file output (Default=false)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_parallel_tiles"
description: "\\# of tiles to process in parallel (default 1)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_conversion_threads"
description: "\\# of threads for conversion (per tile, default # cpu threads)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_compression_threads"
description: "\\# of threads for fastq.gz output compression (per tile, default\
\ # cpu threads, or HW+12)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_decompression_threads"
description: "\\# of threads for bcl/cbcl input decompression (per tile, default\
\ half # cpu threads, or HW+8). Only applies when preloading files"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Run arguments"
arguments:
- type: "boolean"
name: "--bcl_only_matched_reads"
description: "For pure BCL conversion, do not output files for 'Undetermined'\
\ [unmatched] reads (output by default)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--no_lane_splitting"
description: "Do not split FASTQ file by lane (false by default)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--num_unknown_barcodes_reported"
description: "\\# of Top Unknown Barcodes to output (1000 by default)"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--bcl_validate_sample_sheet_only"
description: "Only validate RunInfo.xml & SampleSheet files (produce no FASTQ\
\ files)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--strict_mode"
description: "Abort if any files are missing (false by default)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--sample_name_column_enabled"
description: "Use sample sheet 'Sample_Name' column when naming fastq files &\
\ subdirectories"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output_directory"
alternatives:
- "-o"
description: "Output directory containig fastq files"
info: null
example:
- "fastq_dir"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--bcl_sampleproject_subdirectories"
description: "Output to subdirectories based upon sample sheet 'Sample_Project'\
\ column"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fastq_gzip_compression_level"
description: "Set fastq output compression level 0-9 (default 1)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reports"
description: "Reports directory"
info: null
example:
- "reports_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--logs"
description: "Reports directory"
info: null
example:
- "logs_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--force"
description: "Allow destination directory to already exist and overwrite files.\n"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\
\ upgrading from bcl2fastq via\n[Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)\n\
and [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "demultiplex"
- "fastq"
- "bcl"
- "illumina"
license: "Proprietary"
links:
repository: "https://github.com/viash-hub/biobox"
homepage: "https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html"
documentation: "https://support.illumina.com/downloads/bcl-convert-user-guide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:trixie-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "wget"
- "gdb"
- "which"
- "hostname"
- "alien"
- "procps"
interactive: false
- type: "docker"
run:
- "wget https://s3.amazonaws.com/webdata.illumina.com/downloads/software/bcl-convert/bcl-convert-4.2.7-2.el8.x86_64.rpm\
\ -O /tmp/bcl-convert.rpm && \\\nalien -i /tmp/bcl-convert.rpm && \\\nrm -rf\
\ /var/lib/apt/lists/* && \\\nrm /tmp/bcl-convert.rpm\n"
- type: "docker"
run:
- "echo \"bcl-convert: \\\"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/\
\ s/^bcl-convert\\ Version //p')\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcl_convert/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcl_convert"
executable: "target/executable/bcl_convert/bcl_convert"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,304 @@
name: "bd_rhapsody_make_reference"
namespace: "bd_rhapsody"
version: "bump_viash_0_9_4"
authors:
- name: "Robrecht Cannoodt"
roles:
- "author"
- "maintainer"
info:
links:
email: "robrecht@data-intuitive.com"
github: "rcannood"
orcid: "0000-0003-3641-729X"
linkedin: "robrechtcannoodt"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Science Engineer"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Core Member"
- name: "Weiwei Schultz"
roles:
- "contributor"
info:
organizations:
- name: "Janssen R&D US"
role: "Associate Director Data Sciences"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--genome_fasta"
description: "Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody\
\ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse."
info:
config_key: "Genome_fasta"
example:
- "genome_sequence.fa.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "File path to the transcript annotation files in GTF or GTF.GZ format.\
\ The Sequence Analysis Pipeline requires the 'gene_name' or \n'gene_id' attribute\
\ to be set on each gene and exon feature. Gene and exon feature lines must\
\ have the same attribute, and exons\nmust have a corresponding gene with the\
\ same value. For TCR/BCR assays, the TCR or BCR gene segments must have the\
\ 'gene_type' or\n'gene_biotype' attribute set, and the value should begin with\
\ 'TR' or 'IG', respectively.\n"
info:
config_key: "Gtf"
example:
- "transcriptome_annotation.gtf.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--extra_sequences"
description: "File path to additional sequences in FASTA format to use when building\
\ the STAR index. (e.g. transgenes or CRISPR guide barcodes).\nGTF lines for\
\ these sequences will be automatically generated and combined with the main\
\ GTF.\n"
info:
config_key: "Extra_sequences"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--reference_archive"
description: "A Compressed archive containing the Reference Genome Index and annotation\
\ GTF files. This archive is meant to be used as an\ninput in the BD Rhapsody\
\ Sequencing Analysis Pipeline.\n"
info: null
example:
- "star_index.tar.gz"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--mitochondrial_contigs"
description: "Names of the Mitochondrial contigs in the provided Reference Genome.\
\ Fragments originating from contigs other than these are\nidentified as 'nuclear\
\ fragments' in the ATACseq analysis pipeline.\n"
info:
config_key: "Mitochondrial_contigs"
default:
- "chrM"
- "chrMT"
- "M"
- "MT"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--filtering_off"
description: "By default the input Transcript Annotation files are filtered based\
\ on the gene_type/gene_biotype attribute. Only features \nhaving the following\
\ attribute values are kept:\n\n - protein_coding\n - lncRNA (lincRNA and\
\ antisense for Gencode < v31/M22/Ensembl97)\n - IG_LV_gene\n - IG_V_gene\n\
\ - IG_V_pseudogene\n - IG_D_gene\n - IG_J_gene\n - IG_J_pseudogene\n -\
\ IG_C_gene\n - IG_C_pseudogene\n - TR_V_gene\n - TR_V_pseudogene\n - TR_D_gene\n\
\ - TR_J_gene\n - TR_J_pseudogene\n - TR_C_gene\n\n If you have already\
\ pre-filtered the input Annotation files and/or wish to turn-off the filtering,\
\ please set this option to True.\n"
info:
config_key: "Filtering_off"
direction: "input"
- type: "boolean_true"
name: "--wta_only_index"
description: "Build a WTA only index, otherwise builds a WTA + ATAC index."
info:
config_key: "Wta_Only"
direction: "input"
- type: "string"
name: "--extra_star_params"
description: "Additional parameters to pass to STAR when building the genome index.\
\ Specify exactly like how you would on the command line."
info:
config_key: "Extra_STAR_params"
example:
- "--limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
description: "The Reference Files Generator creates an archive containing Genome Index\n\
and Transcriptome annotation files needed for the BD Rhapsody Sequencing\nAnalysis\
\ Pipeline. The app takes as input one or more FASTA and GTF files\nand produces\
\ a compressed archive in the form of a tar.gz file. The \narchive contains:\n\n\
- STAR index\n- Filtered GTF file\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "genome"
- "reference"
- "index"
- "align"
license: "Unknown"
links:
repository: "https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/"
documentation: "https://bd-rhapsody-bioinfo-docs.genomics.bd.com/resources/extra_utilities.html#make-rhapsody-reference"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "bdgenomics/rhapsody:2.2.1"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "git"
interactive: false
- type: "python"
user: false
packages:
- "cwlref-runner"
- "cwl-runner"
upgrade: true
- type: "docker"
run:
- "mkdir /var/bd_rhapsody_cwl && \\\n cd /var/bd_rhapsody_cwl && \\\n git clone\
\ https://bitbucket.org/CRSwDev/cwl.git . && \\\n git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de\n"
- type: "docker"
run:
- "VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)"
- "echo \"bdgenomics/rhapsody: \\\"$VERSION\\\"\" > /var/software_versions.txt"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bd_rhapsody/bd_rhapsody_make_reference"
executable: "target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,265 @@
name: "bedtools_bamtobed"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input BAM file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output BED file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--bedpe"
description: "Write BEDPE format. Requires BAM to be grouped or sorted by query.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--mate1"
description: "When writing BEDPE (-bedpe) format, always report mate one as the\
\ first BEDPE \"block\".\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed12"
description: "Write \"blocked\" BED format (aka \"BED12\"). Forces -split.\nSee\
\ http://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "Report \"split\" BAM alignments as separate BED entries.\nSplits\
\ only on N CIGAR operations.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--splitD"
description: "Split alignments based on N and D CIGAR operators.\nForces -split.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--edit_distance"
alternatives:
- "-ed"
description: "Use BAM edit distance (NM tag) for BED score.\n- Default for BED\
\ is to use mapping quality.\n- Default for BEDPE is to use the minimum of\n\
\ the two mapping qualities for the pair.\n- When -ed is used with -bedpe,\
\ the total edit\n distance from the two mates is reported.\n"
info: null
direction: "input"
- type: "string"
name: "--tag"
description: "Use other NUMERIC BAM alignment tag for BED score.\nDefault for\
\ BED is to use mapping quality. Disallowed with BEDPE output.\n"
info: null
example:
- "SM"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--color"
description: "An R,G,B string for the color used with BED12 format.\nDefault is\
\ (255,0,0).\n"
info: null
example:
- "250,250,250"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--cigar"
description: "Add the CIGAR string to the BED entry as a 7th column.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts BAM alignments to BED6 or BEDPE format."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Converts"
- "BAM"
- "BED"
- "BED6"
- "BEDPE"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bamtobed.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bamtobed/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bamtobed"
executable: "target/executable/bedtools/bedtools_bamtobed/bedtools_bamtobed"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,217 @@
name: "bedtools_bamtofastq"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input BAM file to be converted to FASTQ."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--fastq"
alternatives:
- "-fq"
description: "Output FASTQ file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq2"
alternatives:
- "-fq2"
description: "FASTQ for second end. Used if BAM contains paired-end data.\nBAM\
\ should be sorted by query name is creating paired FASTQ.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--tags"
description: "Create FASTQ based on the mate info in the BAM R2 and Q2 tags.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Conversion tool for extracting FASTQ records from sequence alignments\
\ in BAM format.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Conversion"
- "BAM"
- "FASTQ"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bamtofastq.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bamtofastq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bamtofastq"
executable: "target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,206 @@
name: "bedtools_bed12tobed6"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input BED12 file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output BED6 file to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--n_score"
alternatives:
- "-n"
description: "Force the score to be the (1-based) block number from the BED12.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts BED features in BED12 (a.k.a. “blocked” BED features such as\
\ genes) to discrete BED6 features.\nFor example, in the case of a gene with six\
\ exons, bed12ToBed6 would create six separate BED6 features (i.e., one for each\
\ exon).\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Converts"
- "BED12"
- "BED6"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bed12tobed6.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bed12tobed6/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bed12tobed6"
executable: "target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,244 @@
name: "bedtools_bedtobam"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (bed/gff/vcf)."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Input genome file.\nNOTE: This is not a fasta file. This is a two-column\
\ tab-delimited file\nwhere the first column is the chromosome name and the\
\ second their sizes.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output BAM file to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--map_quality"
alternatives:
- "-mapq"
description: "Set the mappinq quality for the BAM records.\n"
info: null
default:
- 255
required: false
min: 0
max: 255
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bed12"
description: "The BED file is in BED12 format. The BAM CIGAR\nstring will reflect\
\ BED \"blocks\".\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--uncompress_bam"
alternatives:
- "-ubam"
description: "Write uncompressed BAM output. Default writes compressed BAM.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts feature records (bed/gff/vcf) to BAM format."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Converts"
- "BED"
- "GFF"
- "VCF"
- "BAM"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bedtobam.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "samtools"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bedtobam/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bedtobam"
executable: "target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,367 @@
name: "bedtools_genomecov"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "The input file (BED/GFF/VCF) to be used.\n"
info: null
example:
- "input.bed"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_bam"
alternatives:
- "-ibam"
description: "The input file is in BAM format.\nNote: BAM _must_ be sorted by\
\ positions.\n'--genome' option is ignored if you use '--input_bam' option!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "The genome file to be used.\n"
info: null
example:
- "genome.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output BED file. \n"
info: null
example:
- "output.bed"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--depth"
alternatives:
- "-d"
description: "Report the depth at each genome position (with one-based coordinates).\n\
Default behavior is to report a histogram.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--depth_zero"
alternatives:
- "-dz"
description: "Report the depth at each genome position (with zero-based coordinates).\n\
Reports only non-zero positions.\nDefault behavior is to report a histogram.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed_graph"
alternatives:
- "-bg"
description: "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed_graph_zero_coverage"
alternatives:
- "-bga"
description: "Report depth in BedGraph format, as above (-bg).\nHowever with this\
\ option, regions with zero \ncoverage are also reported. This allows one to\n\
quickly extract all regions of a genome with 0 \ncoverage by applying: \"grep\
\ -w 0$\" to the output.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals.\n\
when computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\"\
\ operations \nto infer the blocks for computing coverage.\nFor BED12 files,\
\ this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns\
\ 10,11,12).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ignore_deletion"
alternatives:
- "-ignoreD"
description: "Ignore local deletions (CIGAR \"D\" operations) in BAM entries\n\
when computing coverage.\n"
info: null
direction: "input"
- type: "string"
name: "--strand"
description: "Calculate coverage of intervals from a specific strand.\nWith BED\
\ files, requires at least 6 columns (strand is column 6). \n"
info: null
required: false
choices:
- "+"
- "-"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--pair_end_coverage"
alternatives:
- "-pc"
description: "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--fragment_size"
alternatives:
- "-fs"
description: "Force to use provided fragment size instead of read length\nWorks\
\ for BAM files only\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--du"
description: "Change strand af the mate read (so both reads from the same strand)\
\ useful for strand specific\nWorks for BAM files only\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--five_prime"
alternatives:
- "-5"
description: "Calculate coverage of 5\" positions (instead of entire interval).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--three_prime"
alternatives:
- "-3"
description: "Calculate coverage of 3\" positions (instead of entire interval).\n"
info: null
direction: "input"
- type: "integer"
name: "--max"
description: "Combine all positions with a depth >= max into\na single bin in\
\ the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n"
info: null
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--scale"
description: "Scale the coverage by a constant factor.\nEach coverage value is\
\ multiplied by this factor before being reported.\nUseful for normalizing coverage\
\ by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n"
info: null
required: false
min: 0.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--trackline"
description: "Adds a UCSC/Genome-Browser track line definition in the first line\
\ of the output.\n- See here for more details about track line definition:\n\
\ http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding\
\ a trackline definition, the output BedGraph can be easily\n uploaded\
\ to the Genome Browser as a custom track,\n BUT CAN NOT be converted into\
\ a BigWig file (w/o removing the first line).\n"
info: null
direction: "input"
- type: "string"
name: "--trackopts"
description: "Writes additional track line definition parameters in the first\
\ line.\n- Example:\n -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'\n\
\ Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Compute the coverage of a feature file among a genome.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "genome coverage"
- "BED"
- "GFF"
- "VCF"
- "BAM"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_genomecov/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_genomecov"
executable: "target/executable/bedtools/bedtools_genomecov/bedtools_genomecov"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,262 @@
name: "bedtools_getfasta"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_fasta"
description: "FASTA file containing sequences for each interval specified in the\
\ input BED file.\nThe headers in the input FASTA file must exactly match the\
\ chromosome column in the BED file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_bed"
description: "BED file containing intervals to extract from the FASTA file.\n\
BED files containing a single region require a newline character\nat the end\
\ of the line, otherwise a blank output file is produced.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--rna"
description: "The FASTA is RNA not DNA. Reverse complementation handled accordingly.\n"
info: null
direction: "input"
- name: "Run arguments"
arguments:
- type: "boolean_true"
name: "--strandedness"
alternatives:
- "-s"
description: "Force strandedness. If the feature occupies the antisense strand,\
\ the output sequence will\nbe reverse complemented. By default strandedness\
\ is not taken into account.\n"
info: null
direction: "input"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file where the output from the 'bedtools getfasta' commend\
\ will\nbe written to.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--tab"
description: "Report extract sequences in a tab-delimited format instead of in\
\ FASTA format.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed_out"
description: "Report extract sequences in a tab-delimited BED format instead of\
\ in FASTA format.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--name"
description: "Set the FASTA header for each extracted sequence to be the \"name\"\
\ and coordinate columns from the BED feature.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--name_only"
description: "Set the FASTA header for each extracted sequence to be the \"name\"\
\ columns from the BED feature.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "When --input is in BED12 format, create a separate fasta entry for\
\ each block in a BED12 record,\nblocks being described in the 11th and 12th\
\ column of the BED.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--full_header"
description: "Use full fasta header. By default, only the word before the first\
\ space or tab is used.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Extract sequences from a FASTA file for each of the intervals defined\
\ in a BED/GFF/VCF file."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "sequencing"
- "fasta"
- "BED"
- "GFF"
- "VCF"
license: "GPL-2.0"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/getfasta.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_getfasta/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_getfasta"
executable: "target/executable/bedtools/bedtools_getfasta/bedtools_getfasta"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,303 @@
name: "bedtools_groupby"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "The input BED file to be used.\n"
info: null
example:
- "input_a.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output groupby BED file. \n"
info: null
example:
- "output.bed"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "string"
name: "--groupby"
alternatives:
- "-g"
- "-grp"
description: "Specify the columns (1-based) for the grouping.\nThe columns must\
\ be comma separated.\n- Default: 1,2,3 \n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--column"
alternatives:
- "-c"
- "-opCols"
description: "Specify the column (1-based) that should be summarized.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--operation"
alternatives:
- "-o"
- "-ops"
description: "Specify the operation that should be applied to opCol.\nValid operations:\n\
\ sum, count, count_distinct, min, max,\n mean, median, mode, antimode,\n\
\ stdev, sstdev (sample standard dev.),\n collapse (i.e., print a comma\
\ separated list (duplicates allowed)), \n distinct (i.e., print a comma\
\ separated list (NO duplicates allowed)), \n distinct_sort_num (as distinct,\
\ but sorted numerically, ascending), \n distinct_sort_num_desc (as distinct,\
\ but sorted numerically, descending), \n concat (i.e., merge values into\
\ a single, non-delimited string), \n freqdesc (i.e., print desc. list of\
\ values:freq)\n freqasc (i.e., print asc. list of values:freq)\n first\
\ (i.e., print first value)\n last (i.e., print last value)\n\nDefault value:\
\ sum \n\nIf there is only column, but multiple operations, all operations\
\ will be\napplied on that column. Likewise, if there is only one operation,\
\ but\nmultiple columns, that operation will be applied to all columns.\nOtherwise,\
\ the number of columns must match the the number of operations,\nand will be\
\ applied in respective order.\nE.g., \"-c 5,4,6 -o sum,mean,count\" will give\
\ the sum of column 5,\nthe mean of column 4, and the count of column 6.\nThe\
\ order of output columns will match the ordering given in the command.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--full"
description: "Print all columns from input file. The first line in the group is\
\ used.\nDefault: print only grouped columns.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--inheader"
description: "Input file has a header line - the first line will be ignored.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--outheader"
description: "Print header line in the output, detailing the column names. \n\
If the input file has headers (-inheader), the output file\nwill use the input's\
\ column names.\nIf the input file has no headers, the output file\nwill use\
\ \"col_1\", \"col_2\", etc. as the column names.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--header"
description: "same as '-inheader -outheader'."
info: null
direction: "input"
- type: "boolean_true"
name: "--ignorecase"
description: "Group values regardless of upper/lower case.\n"
info: null
direction: "input"
- type: "integer"
name: "--precision"
alternatives:
- "-prec"
description: "Sets the decimal precision for output. \n"
info: null
default:
- 5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--delimiter"
alternatives:
- "-delim"
description: "Specify a custom delimiter for the collapse operations.\n"
info: null
example:
- "|"
default:
- ","
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Summarizes a dataset column based upon common column groupings. \nAkin\
\ to the SQL \"group by\" command.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "groupby"
- "BED"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/groupby.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_groupby/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_groupby"
executable: "target/executable/bedtools/bedtools_groupby/bedtools_groupby"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,440 @@
name: "bedtools_intersect"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input_a"
alternatives:
- "-a"
description: "The input file (BED/GFF/VCF/BAM) to be used as the -a file.\n"
info: null
example:
- "input_a.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_b"
alternatives:
- "-b"
description: "The input file(s) (BED/GFF/VCF/BAM) to be used as the -b file(s).\n"
info: null
example:
- "input_b.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output BED file. \n"
info: null
example:
- "output.bed"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--write_a"
alternatives:
- "-wa"
description: "Write the original A entry for each overlap."
info: null
direction: "input"
- type: "boolean_true"
name: "--write_b"
alternatives:
- "-wb"
description: "Write the original B entry for each overlap. \nUseful for knowing\
\ _what_ A overlaps. Restricted by -f and -r.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--left_outer_join"
alternatives:
- "-loj"
description: "Perform a \"left outer join\". That is, for each feature in A report\
\ each overlap with B. \nIf no overlaps are found, report a NULL feature for\
\ B.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--write_overlap"
alternatives:
- "-wo"
description: "Write the original A and B entries plus the number of base pairs\
\ of overlap between the two features.\n- Overlaps restricted by -f and -r.\
\ \n Only A features with overlap are reported.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--write_overlap_plus"
alternatives:
- "-wao"
description: "Write the original A and B entries plus the number of base pairs\
\ of overlap between the two features.\n- Overlaps restricted by -f and -r.\
\ \n However, A features w/o overlap are also reported with a NULL B feature\
\ and overlap = 0.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--report_A_if_no_overlap"
alternatives:
- "-u"
description: "Write the original A entry _if_ no overlap is found. \n- In other\
\ words, just report the fact >=1 hit was found.\n- Overlaps restricted by -f\
\ and -r. \n"
info: null
direction: "input"
- type: "boolean_true"
name: "--number_of_overlaps_A"
alternatives:
- "-c"
description: "For each entry in A, report the number of overlaps with B.\n- Reports\
\ 0 for A entries that have no overlap with B.\n- Overlaps restricted by -f\
\ and -r.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--report_no_overlaps_A"
alternatives:
- "-v"
description: "Only report those entries in A that have _no overlaps_ with B.\n\
- Similar to \"grep -v\" (an homage).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--uncompressed_bam"
alternatives:
- "-ubam"
description: "Write uncompressed BAM output. Default writes compressed BAM."
info: null
direction: "input"
- type: "boolean_true"
name: "--same_strand"
alternatives:
- "-s"
description: "Require same strandedness. That is, only report hits in B.\nthat\
\ overlap A on the _same_ strand.\n- By default, overlaps are reported without\
\ respect to strand.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--opposite_strand"
alternatives:
- "-S"
description: "Require different strandedness. That is, only report hits in B\n\
that overlap A on the _opposite_ strand.\n- By default, overlaps are reported\
\ without respect to strand.\n"
info: null
direction: "input"
- type: "double"
name: "--min_overlap_A"
alternatives:
- "-f"
description: "Minimum overlap required as a fraction of A.\n- Default is 1E-9\
\ (i.e., 1bp).\n- FLOAT (e.g. 0.50)\n"
info: null
example:
- 0.5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--min_overlap_B"
alternatives:
- "-F"
description: "Minimum overlap required as a fraction of B.\n- Default is 1E-9\
\ (i.e., 1bp).\n- FLOAT (e.g. 0.50)\n"
info: null
example:
- 0.5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--reciprocal_overlap"
alternatives:
- "-r"
description: "Require that the fraction overlap be reciprocal for A AND B.\n-\
\ In other words, if -f is 0.90 and -r is used, this requires\nthat B overlap\
\ 90% of A and A _also_ overlaps 90% of B.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--either_overlap"
alternatives:
- "-e"
description: "Require that the minimum fraction be satisfied for A OR B.\n- In\
\ other words, if -e is used with -f 0.90 and -F 0.10 this requires\nthat either\
\ 90% of A is covered OR 10% of B is covered.\nWithout -e, both fractions would\
\ have to be satisfied.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals."
info: null
direction: "input"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Provide a genome file to enforce consistent chromosome \nsort order\
\ across input files. Only applies when used \nwith -sorted option.\n"
info: null
example:
- "genome.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--nonamecheck"
description: "For sorted data, don't throw an error if the file \nhas different\
\ naming conventions for the same chromosome \n(e.g., \"chr1\" vs \"chr01\"\
).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--sorted"
description: "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input.\n"
info: null
direction: "input"
- type: "string"
name: "--names"
description: "When using multiple databases, provide an alias \nfor each that\
\ will appear instead of a fileId when \nalso printing the DB record.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--filenames"
description: "When using multiple databases, show each complete filename instead\
\ of a fileId when also printing the DB record."
info: null
direction: "input"
- type: "boolean_true"
name: "--sortout"
description: "When using multiple databases, sort the output DB hits for each\
\ record."
info: null
direction: "input"
- type: "boolean_true"
name: "--bed"
description: "If using BAM input, write output as BED."
info: null
direction: "input"
- type: "boolean_true"
name: "--header"
description: "Print the header from the A file prior to results."
info: null
direction: "input"
- type: "boolean_true"
name: "--no_buffer_output"
alternatives:
- "--nobuf"
description: "Disable buffered output. Using this option will cause each line\n\
of output to be printed as it is generated, rather than saved\nin a buffer.\
\ This will make printing large output files \nnoticeably slower, but can be\
\ useful in conjunction with\nother software tools and scripts that need to\
\ process one\nline of bedtools output at a time.\n"
info: null
direction: "input"
- type: "integer"
name: "--io_buffer_size"
alternatives:
- "--iobuf"
description: "Specify amount of memory to use for input buffer.\nTakes an integer\
\ argument. Optional suffixes K/M/G supported.\nNote: currently has no effect\
\ with compressed files. \n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "bedtools intersect allows one to screen for overlaps between two sets\
\ of genomic features. \nMoreover, it allows one to have fine control as to how\
\ the intersections are reported. \nbedtools intersect works with both BED/GFF/VCF\
\ and BAM files as input.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "feature intersection"
- "BAM"
- "BED"
- "GFF"
- "VCF"
license: "GPL-2.0, MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_intersect/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_intersect"
executable: "target/executable/bedtools/bedtools_intersect/bedtools_intersect"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,240 @@
name: "bedtools_links"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (bed/gff/vcf)."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output HTML file to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
description: "By default, the links created will point to human (hg18) UCSC browser.\n\
If you have a local mirror, you can override this behavior by supplying\nthe -base,\
\ -org, and -db options.\n\nFor example, if the URL of your local mirror for mouse\
\ MM9 is called: \nhttp://mymirror.myuniversity.edu, then you would use the following:\n\
--base_url http://mymirror.myuniversity.edu\n--organism mouse\n--database mm9\n"
arguments:
- type: "string"
name: "--base_url"
alternatives:
- "-base"
description: "The “basename” for the UCSC browser.\n"
info: null
default:
- "http://genome.ucsc.edu"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--organism"
alternatives:
- "-org"
description: "The organism (e.g. mouse, human). \n"
info: null
default:
- "human"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--database"
alternatives:
- "-db"
description: "The genome build. \n"
info: null
default:
- "hg18"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Creates an HTML file with links to an instance of the UCSC Genome Browser\
\ for all features / intervals in a file. \nThis is useful for cases when one wants\
\ to manually inspect through a large set of annotations or features.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Links"
- "BED"
- "GFF"
- "VCF"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/links.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_links/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_links"
executable: "target/executable/bedtools/bedtools_links/bedtools_links"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,309 @@
name: "bedtools_merge"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (BED/GFF/VCF) to be merged."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Output merged file BED to be written."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--strand"
alternatives:
- "-s"
description: "Force strandedness. That is, only merge features\nthat are on the\
\ same strand.\n- By default, merging is done without respect to strand.\n"
info: null
direction: "input"
- type: "string"
name: "--specific_strand"
alternatives:
- "-S"
description: "Force merge for one specific strand only.\nFollow with + or - to\
\ force merge from only\nthe forward or reverse strand, respectively.\n- By\
\ default, merging is done without respect to strand.\n"
info: null
required: false
choices:
- "+"
- "-"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--distance"
alternatives:
- "-d"
description: "Maximum distance between features allowed for features\nto be merged.\n\
- Def. 0. That is, overlapping & book-ended features are merged.\n- (INTEGER)\n\
- Note: negative values enforce the number of b.p. required for overlap.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--columns"
alternatives:
- "-c"
description: "Specify columns from the B file to map onto intervals in A.\nDefault:\
\ 5.\nMultiple columns can be specified in a comma-delimited list.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--operation"
alternatives:
- "-o"
description: "Specify the operation that should be applied to -c.\nValid operations:\n\
\ sum, min, max, absmin, absmax,\n mean, median, mode, antimode\n stdev,\
\ sstdev\n collapse (i.e., print a delimited list (duplicates allowed)),\
\ \n distinct (i.e., print a delimited list (NO duplicates allowed)), \n\
\ distinct_sort_num (as distinct, sorted numerically, ascending),\n distinct_sort_num_desc\
\ (as distinct, sorted numerically, desscending),\n distinct_only (delimited\
\ list of only unique values),\n count\n count_distinct (i.e., a count\
\ of the unique values in the column), \n first (i.e., just the first value\
\ in the column), \n last (i.e., just the last value in the column), \nDefault:\
\ sum\nMultiple operations can be specified in a comma-delimited list.\n\nIf\
\ there is only column, but multiple operations, all operations will be\napplied\
\ on that column. Likewise, if there is only one operation, but\nmultiple columns,\
\ that operation will be applied to all columns.\nOtherwise, the number of columns\
\ must match the the number of operations,\nand will be applied in respective\
\ order.\nE.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5,\n\
the mean of column 4, and the count of column 6.\nThe order of output columns\
\ will match the ordering given in the command.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--delimiter"
alternatives:
- "-delim"
description: "Specify a custom delimiter for the collapse operations.\n"
info: null
example:
- "|"
default:
- ","
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--precision"
alternatives:
- "-prec"
description: "Sets the decimal precision for output (Default: 5).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bed"
description: "If using BAM input, write output as BED.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--header"
description: "Print the header from the A file prior to results.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_buffer"
alternatives:
- "-nobuf"
description: "Disable buffered output. Using this option will cause each line\n\
of output to be printed as it is generated, rather than saved\nin a buffer.\
\ This will make printing large output files \nnoticeably slower, but can be\
\ useful in conjunction with\nother software tools and scripts that need to\
\ process one\nline of bedtools output at a time.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Merges overlapping BED/GFF/VCF entries into a single interval.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/merge.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_merge/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_merge"
executable: "target/executable/bedtools/bedtools_merge/bedtools_merge"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,252 @@
name: "bedtools_sort"
namespace: "bedtools"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (bed/gff/vcf) to be sorted."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output sorted file (bed/gff/vcf) to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--sizeA"
description: "Sort by feature size in ascending order."
info: null
direction: "input"
- type: "boolean_true"
name: "--sizeD"
description: "Sort by feature size in descending order."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenSizeA"
description: "Sort by chrom (asc), then feature size (asc)."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenSizeD"
description: "Sort by chrom (asc), then feature size (desc)."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenScoreA"
description: "Sort by chrom (asc), then score (asc)."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenScoreD"
description: "Sort by chrom (asc), then score (desc)."
info: null
direction: "input"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Sort according to the chromosomes declared in \"genome.txt\""
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--faidx"
description: "Sort according to the chromosomes declared in \"names.txt\""
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--header"
description: "Print the header from the A file prior to results."
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Sorts a feature file (bed/gff/vcf) by chromosome and other criteria."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "sort"
- "BED"
- "GFF"
- "VCF"
license: "GPL-2.0, MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/sort.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_sort/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_sort"
executable: "target/executable/bedtools/bedtools_sort/bedtools_sort"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,188 @@
name: "busco_download_datasets"
namespace: "busco"
version: "bump_viash_0_9_4"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "string"
name: "--download"
description: "Download dataset. Possible values are a specific dataset name, \"\
all\", \"prokaryota\", \"eukaryota\", or \"virus\".\nThe full list of available\
\ datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/)\
\ or by running the busco/busco_list_datasets component.\n"
info: null
example:
- "stramenopiles_odb10"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--download_path"
description: "Local filepath for storing BUSCO dataset downloads\n"
info: null
example:
- "busco_downloads"
default:
- "busco_downloads"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Downloads available busco datasets"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "lineage datasets"
license: "MIT"
references:
doi:
- "10.1007/978-1-4939-9173-0_14"
links:
repository: "https://gitlab.com/ezlab/busco"
homepage: "https://busco.ezlab.org/"
documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/busco/busco_download_datasets/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/busco/busco_download_datasets"
executable: "target/executable/busco/busco_download_datasets/busco_download_datasets"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,175 @@
name: "busco_list_datasets"
namespace: "busco"
version: "bump_viash_0_9_4"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file of the available busco datasets\n"
info: null
example:
- "file.txt"
default:
- "busco_dataset_list.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Lists the available busco datasets"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "lineage datasets"
license: "MIT"
references:
doi:
- "10.1007/978-1-4939-9173-0_14"
links:
repository: "https://gitlab.com/ezlab/busco"
homepage: "https://busco.ezlab.org/"
documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/busco/busco_list_datasets/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/busco/busco_list_datasets"
executable: "target/executable/busco/busco_list_datasets/busco_list_datasets"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,453 @@
name: "busco_run"
namespace: "busco"
version: "bump_viash_0_9_4"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input sequence file in FASTA format. Can be an assembled genome\
\ or transcriptome (DNA), or protein sequences from an annotated gene set. Also\
\ possible to use a path to a directory containing multiple input files.\n"
info: null
example:
- "file.fasta"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--mode"
alternatives:
- "-m"
description: "Specify which BUSCO analysis mode to run. There are three valid\
\ modes:\n - geno or genome, for genome assemblies (DNA)\n - tran or transcriptome,\
\ for transcriptome assemblies (DNA)\n - prot or proteins, for annotated gene\
\ sets (protein)\n"
info: null
example:
- "proteins"
required: true
choices:
- "genome"
- "geno"
- "transcriptome"
- "tran"
- "proteins"
- "prot"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--lineage_dataset"
alternatives:
- "-l"
description: "Specify a BUSCO lineage dataset that is most closely related to\
\ the assembly or gene set being assessed. \nThe full list of available datasets\
\ can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by\
\ running the busco/busco_list_datasets component.\nWhen unsure, the \"--auto_lineage\"\
\ flag can be set to automatically find the optimal lineage path.\nBUSCO will\
\ automatically download the requested dataset if it is not already present\
\ in the download folder. \nYou can optionally provide a path to a local dataset\
\ instead of a name, e.g. path/to/dataset.\nDatasets can be downloaded using\
\ the busco/busco_download_dataset component.\n"
info: null
example:
- "stramenopiles_odb10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--short_summary_json"
description: "Output file for short summary in JSON format.\n"
info: null
example:
- "short_summary.json"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--short_summary_txt"
description: "Output file for short summary in TXT format.\n"
info: null
example:
- "short_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--full_table"
description: "Full table output in TSV format.\n"
info: null
example:
- "full_table.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--missing_busco_list"
description: "Missing list output in TSV format.\n"
info: null
example:
- "missing_busco_list.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_dir"
description: "The full output directory, if so desired.\n"
info: null
example:
- "output_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Resource and Run Settings"
arguments:
- type: "boolean_true"
name: "--force"
description: "Force rewriting of existing files. Must be used when output files\
\ with the provided name already exist.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Disable the info logs, displays only errors.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--restart"
alternatives:
- "-r"
description: "Continue a run that had already partially completed. Restarting\
\ skips calls to tools that have completed but performs all pre- and post-processing\
\ steps.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--tar"
description: "Compress some subdirectories with many files to save space.\n"
info: null
direction: "input"
- name: "Lineage Dataset Settings"
arguments:
- type: "boolean_true"
name: "--auto_lineage"
description: "Run auto-lineage pipelilne to automatically determine BUSCO lineage\
\ dataset that is most closely related to the assembly or gene set being assessed.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--auto_lineage_euk"
description: "Run auto-placement just on eukaryota tree to find optimal lineage\
\ path.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--auto_lineage_prok"
description: "Run auto_lineage just on prokaryota trees to find optimum lineage\
\ path.\n"
info: null
direction: "input"
- type: "string"
name: "--datasets_version"
description: "Specify the version of BUSCO datasets\n"
info: null
example:
- "odb10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Augustus Settings"
arguments:
- type: "boolean_true"
name: "--augustus"
description: "Use augustus gene predictor for eukaryote runs.\n"
info: null
direction: "input"
- type: "string"
name: "--augustus_parameters"
description: "Additional parameters to be passed to Augustus (see Augustus documentation:\
\ https://github.com/Gaius-Augustus/Augustus/blob/master/docs/RUNNING-AUGUSTUS.md).\n\
Parameters should be contained within a single string, without whitespace and\
\ seperated by commas.\n"
info: null
example:
- "--PARAM1=VALUE1,--PARAM2=VALUE2"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--augustus_species"
description: "Specify the augustus species\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--long"
description: "Optimize Augustus self-training mode. This adds considerably to\
\ the run time, but can improve results for some non-model organisms.\n"
info: null
direction: "input"
- name: "BBTools Settings"
arguments:
- type: "integer"
name: "--contig_break"
description: "Number of contiguous Ns to signify a break between contigs in BBTools\
\ analysis.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--limit"
description: "Number of candidate regions (contig or transcript) from the BLAST\
\ output to consider per BUSCO.\nThis option is only effective in pipelines\
\ using BLAST, i.e. the genome pipeline (see --augustus) or the prokaryota transcriptome\
\ pipeline.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--scaffold_composition"
description: "Writes ACGTN content per scaffold to a file scaffold_composition.txt.\n"
info: null
direction: "input"
- name: "BLAST Settings"
arguments:
- type: "double"
name: "--e_value"
description: "E-value cutoff for BLAST searches.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Protein Gene Prediction settings"
arguments:
- type: "boolean_true"
name: "--miniprot"
description: "Use Miniprot gene predictor.\n"
info: null
direction: "input"
- name: "MetaEuk Settings"
arguments:
- type: "boolean_true"
name: "--metaeuk"
description: "Use Metaeuk gene predictor.\n"
info: null
direction: "input"
- type: "string"
name: "--metaeuk_parameters"
description: "Pass additional arguments to Metaeuk for the first run (see Metaeuk\
\ documentation https://github.com/soedinglab/metaeuk).\nAll parameters should\
\ be contained within a single string with no white space, with each parameter\
\ separated by a comma.\n"
info: null
example:
- "--max-overlap=15,--min-exon-aa=15"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--metaeuk_rerun_parameters"
description: "Pass additional arguments to Metaeuk for the second run (see Metaeuk\
\ documentation https://github.com/soedinglab/metaeuk).\nAll parameters should\
\ be contained within a single string with no white space, with each parameter\
\ separated by a comma.\n"
info: null
example:
- "--max-overlap=15,--min-exon-aa=15"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Assessment of genome assembly and annotation completeness with single\
\ copy orthologs"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Genome assembly"
- "quality control"
license: "MIT"
references:
doi:
- "10.1007/978-1-4939-9173-0_14"
links:
repository: "https://gitlab.com/ezlab/busco"
homepage: "https://busco.ezlab.org/"
documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/busco/busco_run/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/busco/busco_run"
executable: "target/executable/busco/busco_run/busco_run"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
name: "cellranger_mkref"
namespace: "cellranger"
version: "bump_viash_0_9_4"
authors:
- name: "Emma Rousseau"
roles:
- "author"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--genome_fasta"
description: "Reference genome fasta."
info: null
example:
- "genome_sequence.fa.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcriptome_gtf"
description: "Reference transcriptome annotation."
info: null
example:
- "transcriptome_annotation.gtf.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--reference_version"
description: "Optional reference version string to include with reference"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output"
description: "Output folder"
info: null
example:
- "cellranger_reference"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Build a Cell Ranger-compatible reference folder from user-supplied genome\
\ FASTA and gene GTF files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "cellranger"
- "single-cell"
- "rna-seq"
- "alignment"
- "reference"
- "gtf"
- "fasta"
license: "Proprietary"
references:
doi:
- "10.1038/ncomms14049"
links:
repository: "https://github.com/10XGenomics/cellranger/blob/main/lib/python/cellranger/reference_builder.py"
homepage: "https://www.10xgenomics.com/support/software/cell-ranger/latest"
documentation: "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/references"
issue_tracker: "https://github.com/10XGenomics/cellranger/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
label:
- "highmem"
- "highcpu"
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ghcr.io/data-intuitive/cellranger:8.0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "pigz"
interactive: false
test_setup:
- type: "apt"
packages:
- "seqkit"
interactive: false
- type: "docker"
run:
- "cellranger --version | sed 's/ cellranger-/: /' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/cellranger/cellranger_mkref/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/cellranger/cellranger_mkref"
executable: "target/executable/cellranger/cellranger_mkref/cellranger_mkref"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,770 @@
name: "cutadapt"
version: "bump_viash_0_9_4"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
argument_groups:
- name: "Specify Adapters for R1"
arguments:
- type: "string"
name: "--adapter"
alternatives:
- "-a"
description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
\ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
\ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
\ the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--front"
alternatives:
- "-g"
description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
\ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
\ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
\ the adapter is\nonly found if it is a prefix of the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--anywhere"
alternatives:
- "-b"
description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
\ (paired data: of the first read). Both types of\nmatches as described under\
\ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
\ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
for rescuing failed library preparations - do not use if\nyou know which end\
\ your adapter was ligated to!\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R1"
arguments:
- type: "file"
name: "--adapter_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 3'\
\ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
\ found if it is a suffix of the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--front_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 5'\
\ end (paired data:\nof the first read). The adapter and any preceding bases\n\
are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
\ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
\ the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--anywhere_fasta"
description: "Fasta file containing sequences of an adapter that may be ligated\
\ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
\ as described under -a and -g are allowed. If the\nfirst base of the read is\
\ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
\ option is mostly\nfor rescuing failed library preparations - do not use if\n\
you know which end your adapter was ligated to!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Specify Adapters for R2"
arguments:
- type: "string"
name: "--adapter_r2"
alternatives:
- "-A"
description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
\ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
\ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
\ the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--front_r2"
alternatives:
- "-G"
description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
\ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
\ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
\ the adapter is\nonly found if it is a prefix of the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--anywhere_r2"
alternatives:
- "-B"
description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
\ (paired data: of the first read). Both types of\nmatches as described under\
\ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
\ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
for rescuing failed library preparations - do not use if\nyou know which end\
\ your adapter was ligated to!\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R2"
arguments:
- type: "file"
name: "--adapter_r2_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 3'\
\ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
\ found if it is a suffix of the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--front_r2_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 5'\
\ end (paired data:\nof the first read). The adapter and any preceding bases\n\
are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
\ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
\ the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--anywhere_r2_fasta"
description: "Fasta file containing sequences of an adapter that may be ligated\
\ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
\ as described under -a and -g are allowed. If the\nfirst base of the read is\
\ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
\ option is mostly\nfor rescuing failed library preparations - do not use if\n\
you know which end your adapter was ligated to!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Paired-end options"
arguments:
- type: "boolean_true"
name: "--pair_adapters"
description: "Treat adapters given with -a/-A etc. as pairs. Either both\nor none\
\ are removed from each read pair.\n"
info: null
direction: "input"
- type: "string"
name: "--pair_filter"
description: "Which of the reads in a paired-end read have to match the\nfiltering\
\ criterion in order for the pair to be filtered.\n"
info: null
required: false
choices:
- "any"
- "both"
- "first"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--interleaved"
description: "Read and/or write interleaved paired-end reads.\n"
info: null
direction: "input"
- name: "Input parameters"
arguments:
- type: "file"
name: "--input"
description: "Input fastq file for single-end reads or R1 for paired-end reads.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Input fastq file for R2 in the case of paired-end reads.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--error_rate"
alternatives:
- "-E"
- "--errors"
description: "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber\
\ of errors for full-length adapter match (if E is an\ninteger >= 1). Error\
\ rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n"
info: null
example:
- 0.1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_indels"
description: "Allow only mismatches in alignments.\n"
info: null
direction: "input"
- type: "integer"
name: "--times"
alternatives:
- "-n"
description: "Remove up to COUNT adapters from each read. Default: 1.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--overlap"
alternatives:
- "-O"
description: "Require MINLENGTH overlap between read and adapter for an\nadapter\
\ to be found. The default is 3.\n"
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--match_read_wildcards"
description: "Interpret IUPAC wildcards in reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_match_adapter_wildcards"
description: "Do not interpret IUPAC wildcards in adapters.\n"
info: null
direction: "input"
- type: "string"
name: "--action"
description: "What to do if a match was found. trim: trim adapter and\nup- or\
\ downstream sequence; retain: trim, but retain\nadapter; mask: replace with\
\ 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\n\
The default is trim.\n"
info: null
example:
- "trim"
required: false
choices:
- "trim"
- "retain"
- "mask"
- "lowercase"
- "none"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--revcomp"
alternatives:
- "--rc"
description: "Check both the read and its reverse complement for adapter\nmatches.\
\ If match is on reverse-complemented version,\noutput that one.\n"
info: null
direction: "input"
- name: "Demultiplexing options"
arguments:
- type: "string"
name: "--demultiplex_mode"
description: "Enable demultiplexing and set the mode for it.\nWith mode 'unique_dual',\
\ adapters from the first and second read are used,\nand the indexes from the\
\ reads are only used in pairs. This implies\n--pair_adapters.\nEnabling mode\
\ 'combinatorial_dual' allows all combinations of the sets of indexes\non R1\
\ and R2. It is necessary to write each read pair to an output\nfile depending\
\ on the adapters found on both R1 and R2.\nMode 'single', uses indexes or barcodes\
\ located at the 5'\nend of the R1 read (single). \n"
info: null
required: false
choices:
- "single"
- "unique_dual"
- "combinatorial_dual"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Read modifications"
arguments:
- type: "integer"
name: "--cut"
alternatives:
- "-u"
description: "Remove LEN bases from each read (or R1 if paired; use --cut_r2\n\
option for R2). If LEN is positive, remove bases from the\nbeginning. If LEN\
\ is negative, remove bases from the end.\nCan be used twice if LENs have different\
\ signs. Applied\n*before* adapter trimming.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "integer"
name: "--cut_r2"
description: "Remove LEN bases from each read (for R2). If LEN is positive, remove\
\ bases from the\nbeginning. If LEN is negative, remove bases from the end.\n\
Can be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--nextseq_trim"
description: "NextSeq-specific quality trimming (each read). Trims also\ndark\
\ cycles appearing as high-quality G bases.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_cutoff"
alternatives:
- "-q"
description: "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore\
\ adapter removal. Applied to both reads if data is\npaired. If one value is\
\ given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given,\
\ the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_cutoff_r2"
alternatives:
- "-Q"
description: "Quality-trimming cutoff for R2. Default: same as for R1\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--quality_base"
description: "Assume that quality values in FASTQ are encoded as\nascii(quality\
\ + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default\
\ is 33.\n"
info: null
example:
- 33
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--poly_a"
description: "Trim poly-A tails"
info: null
direction: "input"
- type: "integer"
name: "--length"
alternatives:
- "-l"
description: "Shorten reads to LENGTH. Positive values remove bases at\nthe end\
\ while negative ones remove bases at the beginning.\nThis and the following\
\ modifications are applied after\nadapter trimming.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--trim_n"
description: "Trim N's on ends of reads."
info: null
direction: "input"
- type: "string"
name: "--length_tag"
description: "Search for TAG followed by a decimal number in the\ndescription\
\ field of the read. Replace the decimal number\nwith the correct length of\
\ the trimmed read. For example,\nuse --length-tag 'length=' to correct fields\
\ like\n'length=123'.\n"
info: null
example:
- "length="
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strip_suffix"
description: "Remove this suffix from read names if present. Can be\ngiven multiple\
\ times.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--prefix"
alternatives:
- "-x"
description: "Add this prefix to read names. Use {name} to insert the\nname of\
\ the matching adapter.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--suffix"
alternatives:
- "-y"
description: "Add this suffix to read names; can also include {name}\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--rename"
description: "Rename reads using TEMPLATE containing variables such as\n{id},\
\ {adapter_name} etc. (see documentation)\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--zero_cap"
alternatives:
- "-z"
description: "Change negative quality values to zero."
info: null
direction: "input"
- name: "Filtering of processed reads"
description: "Filters are applied after above read modifications. Paired-end reads\
\ are\nalways discarded pairwise (see also --pair_filter).\n"
arguments:
- type: "string"
name: "--minimum_length"
alternatives:
- "-m"
description: "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end\
\ reads, the minimum lengths for R1 and R2 can be specified separately by separating\
\ them with a colon (:).\nIf the colon syntax is not used, the same minimum\
\ length applies to both reads, as discussed above.\nAlso, one of the values\
\ can be omitted to impose no restrictions.\nFor example, with -m 17:, the length\
\ of R1 must be at least 17, but the length of R2 is ignored.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--maximum_length"
alternatives:
- "-M"
description: "Discard reads longer than LEN. Default: no limit.\nFor paired reads,\
\ see the remark for --minimum_length\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--max_n"
description: "Discard reads with more than COUNT 'N' bases. If COUNT is\na number\
\ between 0 and 1, it is interpreted as a fraction\nof the read length.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "long"
name: "--max_expected_errors"
alternatives:
- "--max_ee"
description: "Discard reads whose expected number of errors (computed\nfrom quality\
\ values) exceeds ERRORS.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "long"
name: "--max_average_error_rate"
alternatives:
- "--max_aer"
description: "as --max_expected_errors (see above), but divided by\nlength to\
\ account for reads of varying length.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--discard_trimmed"
alternatives:
- "--discard"
description: "Discard reads that contain an adapter. Use also -O to\navoid discarding\
\ too many randomly matching reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--discard_untrimmed"
alternatives:
- "--trimmed_only"
description: "Discard reads that do not contain an adapter.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--discard_casava"
description: "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n"
info: null
direction: "input"
- name: "Output parameters"
arguments:
- type: "string"
name: "--report"
description: "Which type of report to print: 'full' (default) or 'minimal'.\n"
info: null
example:
- "full"
required: false
choices:
- "full"
- "minimal"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--json"
description: "Write report in JSON format to this file.\n"
info: null
direction: "input"
- type: "file"
name: "--output"
description: "Glob pattern for matching the expected output files.\nShould include\
\ `$output_dir`.\n"
info: null
example:
- "fastq/*_001.fast[a,q]"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--fasta"
description: "Output FASTA to standard output even on FASTQ input.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--info_file"
description: "Write information about each read and its adapter matches\ninto\
\ info.txt in the output directory.\nSee the documentation for the file format.\n"
info: null
direction: "input"
- name: "Debug"
arguments:
- type: "boolean_true"
name: "--debug"
description: "Print debug information"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "RNA-seq"
- "scRNA-seq"
- "high-throughput"
license: "MIT"
references:
doi:
- "10.14806/ej.17.1.200"
links:
repository: "https://github.com/marcelm/cutadapt"
homepage: "https://cutadapt.readthedocs.io"
documentation: "https://cutadapt.readthedocs.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "python"
user: false
pip:
- "cutadapt"
upgrade: true
- type: "docker"
run:
- "cutadapt --version | sed 's/\\(.*\\)/cutadapt: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/cutadapt/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/cutadapt"
executable: "target/executable/cutadapt/cutadapt"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,347 @@
name: "falco"
version: "bump_viash_0_9_4"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input"
description: "input fastq files"
info: null
example:
- "input1.fastq;input2.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Run arguments"
arguments:
- type: "boolean_true"
name: "--nogroup"
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
\ data for every base in \nthe read. WARNING: When using this option, \nyour\
\ plots may end up a ridiculous size. You \nhave been warned!\n"
info: null
direction: "input"
- type: "file"
name: "--contaminents"
description: "Specifies a non-default file which contains \nthe list of contaminants\
\ to screen \noverrepresented sequences against. The file \nmust contain sets\
\ of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith\
\ a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--adapters"
description: "Specifies a non-default file which contains \nthe list of adapter\
\ sequences which will be \nexplicity searched against the library. The \nfile\
\ must contain sets of named adapters in \nthe form name[tab]sequence. Lines\
\ prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--limits"
description: "Specifies a non-default file which contains \na set of criteria\
\ which will be used to \ndetermine the warn/error limits for the \nvarious\
\ modules. This file can also be used \nto selectively remove some modules from\
\ the \noutput all together. The format needs to \nmirror the default limits.txt\
\ file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--subsample"
alternatives:
- "-s"
description: "[Falco only] makes falco faster (but \npossibly less accurate) by\
\ only processing \nreads that are a multiple of this value (using \n0-based\
\ indexing to number reads).\n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bisulfite"
alternatives:
- "-b"
description: "[Falco only] reads are whole genome \nbisulfite sequencing, and\
\ more Ts and fewer \nCs are therefore expected and will be \naccounted for\
\ in base content.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--reverse_complement"
alternatives:
- "-r"
description: "[Falco only] The input is a \nreverse-complement. All modules will\
\ be \ntested by swapping A/T and C/G\n"
info: null
direction: "input"
- name: "Output arguments"
arguments:
- type: "file"
name: "--outdir"
alternatives:
- "-o"
description: "Create all output files in the specified \noutput directory. FALCO-SPECIFIC:\
\ If the \ndirectory does not exists, the program will \ncreate it.\n"
info: null
example:
- "output"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format \ndetection and forces\
\ the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped,\
\ sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n"
info: null
required: false
choices:
- "bam"
- "sam"
- "bam_mapped"
- "sam_mapped"
- "fastq"
- "fq"
- "fastq.gz"
- "fq.gz"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--data_filename"
alternatives:
- "-D"
description: "[Falco only] Specify filename for FastQC \ndata output (TXT). If\
\ not specified, it will \nbe called fastq_data.txt in either the input \nfile's\
\ directory or the one specified in the \n--output flag. Only available when\
\ running \nfalco with a single input.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--report_filename"
alternatives:
- "-R"
description: "[Falco only] Specify filename for FastQC \nreport output (HTML).\
\ If not specified, it \nwill be called fastq_report.html in either \nthe input\
\ file's directory or the one \nspecified in the --output flag. Only \navailable\
\ when running falco with a single \ninput.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--summary_filename"
alternatives:
- "-S"
description: "[Falco only] Specify filename for the short \nsummary output (TXT).\
\ If not specified, it \nwill be called fastq_report.html in either \nthe input\
\ file's directory or the one \nspecified in the --output flag. Only \navailable\
\ when running falco with a single \ninput.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "A C++ drop-in replacement of FastQC to assess the quality of sequence\
\ read data"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "qc"
- "fastqc"
- "sequencing"
license: "GPL-3.0"
references:
doi:
- "10.12688/f1000research.21142.2"
links:
repository: "https://github.com/smithlabcode/falco"
documentation: "https://falco.readthedocs.io/en/latest/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:trixie-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "wget"
- "build-essential"
- "g++"
- "zlib1g-dev"
- "procps"
interactive: false
- type: "docker"
run:
- "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz\
\ -O /tmp/falco.tar.gz && \\\ncd /tmp && \\\ntar xvf falco.tar.gz && \\\ncd\
\ falco-1.2.2 && \\\n./configure && \\\nmake all && \\\nmake install\n"
- type: "docker"
run:
- "echo \"falco: \\\"$(falco -v | sed -n 's/^falco //p')\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/falco/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/falco"
executable: "target/executable/falco/falco"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

1563
target/executable/falco/falco Executable file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3455
target/executable/fastp/fastp Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,370 @@
name: "fastqc"
version: "bump_viash_0_9_4"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "FASTQ file(s) to be analyzed.\n"
info: null
example:
- "input.fq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
description: "At least one of the output options (--html, --zip, --summary, --data)\
\ must be used.\n"
arguments:
- type: "file"
name: "--html"
description: "Create the HTML report of the results. \n'*' wild card must be provided\
\ in the output file name. \nWild card will be replaced by the input file basename.\n\
e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\
\ html file named sample_1.html\n"
info: null
example:
- "*.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--zip"
description: "Create the zip file(s) containing: html report, data, images, icons,\
\ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\
\ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\
\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
info: null
example:
- "*.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--summary"
description: "Create the summary file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\
\ an output summary.txt file named sample_1_summary.txt\n"
info: null
example:
- "*_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--data"
description: "Create the data file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\
\ output data.txt file named sample_1_data.txt\n"
info: null
example:
- "*_data.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--casava"
description: "Files come from raw casava output. Files in the same sample\ngroup\
\ (differing only by the group number) will be analysed\nas a set rather than\
\ individually. Sequences with the filter\nflag set in the header will be excluded\
\ from the analysis.\nFiles must have the same names given to them by casava\n\
(including being gzipped and ending with .gz) otherwise they\nwon't be grouped\
\ together correctly.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nano"
description: "Files come from nanopore sequences and are in fast5 format. In\n\
this mode you can pass in directories to process and the program\nwill take\
\ in all fast5 files within those directories and produce\na single output file\
\ from the sequences found in all files.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nofilter"
description: "If running with --casava then don't remove read flagged by\ncasava\
\ as poor quality when performing the QC analysis.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nogroup"
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
\ data for every base in the read. \nWARNING: Using this option will cause fastqc\
\ to crash \nand burn if you use it on really long reads, and your \nplots may\
\ end up a ridiculous size. You have been warned!\n"
info: null
direction: "input"
- type: "integer"
name: "--min_length"
description: "Sets an artificial lower limit on the length of the \nsequence to\
\ be shown in the report. As long as you \nset this to a value greater or equal\
\ to your longest \nread length then this will be the sequence length used \n\
to create your read groups. This can be useful for making\ndirectly comparable\
\ statistics from datasets with somewhat \nvariable read lengths.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format detection and \nforces\
\ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\
\ sam_mapped, and fastq.\n"
info: null
example:
- "bam"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--contaminants"
alternatives:
- "-c"
description: "Specifies a non-default file which contains the list \nof contaminants\
\ to screen overrepresented sequences against. \nThe file must contain sets\
\ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\
\ a hash will be ignored.\n"
info: null
example:
- "contaminants.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--adapters"
alternatives:
- "-a"
description: "Specifies a non-default file which contains the list of \nadapter\
\ sequences which will be explicitly searched against \nthe library. The file\
\ must contain sets of named adapters \nin the form name[tab]sequence. Lines\
\ prefixed with a hash will be ignored.\n"
info: null
example:
- "adapters.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--limits"
alternatives:
- "-l"
description: "Specifies a non-default file which contains \na set of criteria\
\ which will be used to determine \nthe warn/error limits for the various modules.\
\ \nThis file can also be used to selectively remove \nsome modules from the\
\ output altogether. The format \nneeds to mirror the default limits.txt file\
\ found in \nthe Configuration folder.\n"
info: null
example:
- "limits.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--kmers"
alternatives:
- "-k"
description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\
\ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\
\ specified.\n"
info: null
example:
- 7
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Suppress all progress messages on stdout and only report errors.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "FastQC - A high throughput sequence QC analysis tool."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Quality control"
- "BAM"
- "SAM"
- "FASTQ"
license: "GPL-3.0, Apache-2.0"
links:
repository: "https://github.com/s-andrews/FastQC"
homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/"
issue_tracker: "https://github.com/s-andrews/FastQC/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "biocontainers/fastqc:v0.11.9_cv8"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/fastqc/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/fastqc"
executable: "target/executable/fastqc/fastqc"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

1706
target/executable/fastqc/fastqc Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,675 @@
name: "featurecounts"
version: "bump_viash_0_9_4"
authors:
- name: "Sai Nirmayi Yasa"
roles:
- "author"
- "maintainer"
info:
links:
email: "nirmayi@data-intuitive.com"
github: "sainirmayi"
linkedin: "sai-nirmayi-yasa"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Junior Bioinformatics Researcher"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--annotation"
alternatives:
- "-a"
description: "Name of an annotation file. GTF/GFF format by default. See '--format'\
\ option for more format information.\n"
info: null
example:
- "annotation.gtf"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "A list of SAM or BAM format files separated by semi-colon (;). They\
\ can be either name or location sorted. Location-sorted paired-end reads are\
\ automatically sorted by read names.\n"
info: null
example:
- "input_file1.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--counts"
alternatives:
- "-o"
description: "Name of output file including read counts in tab delimited format.\n"
info: null
example:
- "features.tsv"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--summary"
description: "Summary statistics of counting results in tab delimited format.\n"
info: null
example:
- "summary.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--junctions"
description: "Count number of reads supporting each exon-exon junction. Junctions\
\ were identified from those exon-spanning reads in the input (containing 'N'\
\ in CIGAR string).\n"
info: null
example:
- "junctions.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Annotation"
arguments:
- type: "string"
name: "--format"
alternatives:
- "-F"
description: "Specify format of the provided annotation file. Acceptable formats\
\ include 'GTF' (or compatible GFF format) and 'SAF'. 'GTF' by default. \n"
info: null
example:
- "GTF"
required: false
choices:
- "GTF"
- "GFF"
- "SAF"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--feature_type"
alternatives:
- "-t"
description: "Specify feature type(s) in a GTF annotation. If multiple types are\
\ provided, they should be separated by ';' with no space in between. 'exon'\
\ by default. Rows in the annotation with a matched feature will be extracted\
\ and used for read mapping.\n"
info: null
example:
- "exon"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--attribute_type"
alternatives:
- "-g"
description: "Specify attribute type in GTF annotation. 'gene_id' by default.\
\ Meta-features used for read counting will be extracted from annotation using\
\ the provided value.\n"
info: null
example:
- "gene_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_attributes"
description: "Extract extra attribute types from the provided GTF annotation and\
\ include them in the counting output. These attribute types will not be used\
\ to group features. If more than one attribute type is provided they should\
\ be separated by semicolon (;).\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--chrom_alias"
alternatives:
- "-A"
description: "Provide a chromosome name alias file to match chr names in annotation\
\ with those in the reads. This should be a two-column comma-delimited text\
\ file. Its first column should include chr names in the annotation and its\
\ second column should include chr names in the reads. Chr names are case sensitive.\
\ No column header should be included in the file.\n"
info: null
example:
- "chrom_alias.csv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Level of summarization"
arguments:
- type: "boolean_true"
name: "--feature_level"
alternatives:
- "-f"
description: "Perform read counting at feature level (eg. counting reads for exons\
\ rather than genes).\n"
info: null
direction: "input"
- name: "Overlap between reads and features"
arguments:
- type: "boolean_true"
name: "--overlapping"
alternatives:
- "-O"
description: "Assign reads to all their overlapping meta-features (or features\
\ if '--feature_level' is specified).\n"
info: null
direction: "input"
- type: "integer"
name: "--min_overlap"
description: "Minimum number of overlapping bases in a read that is required for\
\ read assignment. 1 by default. Number of overlapping bases is counted from\
\ both reads if paired end. If a negative value is provided, then a gap of up\
\ to specified size will be allowed between read and the feature that the read\
\ is assigned to.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--frac_overlap"
description: "Minimum fraction of overlapping bases in a read that is required\
\ for read assignment. Value should be within range [0,1]. 0 by default. Number\
\ of overlapping bases is counted from both reads if paired end. Both this option\
\ and '--min_overlap' option need to be satisfied for read assignment.\n"
info: null
example:
- 0.0
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--frac_overlap_feature"
description: "Minimum fraction of overlapping bases in a feature that is required\
\ for read assignment. Value should be within range [0,1]. 0 by default.\n"
info: null
example:
- 0.0
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--largest_overlap"
description: "Assign reads to a meta-feature/feature that has the largest number\
\ of overlapping bases.\n"
info: null
direction: "input"
- type: "integer"
name: "--non_overlap"
description: "Maximum number of non-overlapping bases in a read (or a read pair)\
\ that is allowed when being assigned to a feature. No limit is set by default.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--non_overlap_feature"
description: "Maximum number of non-overlapping bases in a feature that is allowed\
\ in read assignment. No limit is set by default.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_extension5"
description: "Reads are extended upstream by <int> bases from their 5' end.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_extension3"
description: "Reads are extended upstream by <int> bases from their 3' end.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read2pos"
description: "Reduce reads to their 5' most base or 3' most base. Read counting\
\ is then performed based on the single base the read is reduced to.\n"
info: null
required: false
choices:
- 3
- 5
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Multi-mapping reads"
arguments:
- type: "boolean_true"
name: "--multi_mapping"
alternatives:
- "-M"
description: "Multi-mapping reads will also be counted. For a multi-mapping read,\
\ all its reported alignments will be counted. The 'NH' tag in BAM/SAM input\
\ is used to detect multi-mapping reads.\n"
info: null
direction: "input"
- name: "Fractional counting"
arguments:
- type: "boolean_true"
name: "--fraction"
description: "Assign fractional counts to features. This option must be used together\
\ with '--multi_mapping' or '--overlapping' or both. When '--multi_mapping'\
\ is specified, each reported alignment from a multi-mapping read (identified\
\ via 'NH' tag) will carry a fractional count of 1/x, instead of 1 (one), where\
\ x is the total number of alignments reported for the same read. When '--overlapping'\
\ is specified, each overlapping feature will receive a fractional count of\
\ 1/y, where y is the total number of features overlapping with the read. When\
\ both '--multi_mapping' and '--overlapping' are specified, each alignment will\
\ carry a fractional count of 1/(x*y).\n"
info: null
direction: "input"
- name: "Read filtering"
arguments:
- type: "integer"
name: "--min_map_quality"
alternatives:
- "-Q"
description: "The minimum mapping quality score a read must satisfy in order to\
\ be counted. For paired-end reads, at least one end should satisfy this criteria.\
\ 0 by default.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--split_only"
description: "Count split alignments only (ie. alignments with CIGAR string containing\
\ 'N'). An example of split alignments is exon-spanning reads in RNA-seq data.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--non_split_only"
description: "If specified, only non-split alignments (CIGAR strings do not contain\
\ letter 'N') will be counted. All the other alignments will be ignored.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--primary"
description: "Count primary alignments only. Primary alignments are identified\
\ using bit 0x100 in SAM/BAM FLAG field.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ignore_dup"
description: "Ignore duplicate reads in read counting. Duplicate reads are identified\
\ using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one\
\ of the reads is a duplicate read for paired end data.\n"
info: null
direction: "input"
- name: "Strandedness"
arguments:
- type: "integer"
name: "--strand"
alternatives:
- "-s"
description: "Perform strand-specific read counting. A single integer value (applied\
\ to all input files) should be provided. Possible values include: 0 (unstranded),\
\ 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded\
\ read counting carried out for all input files).\n"
info: null
example:
- 0
required: false
choices:
- 0
- 1
- 2
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Exon-exon junctions"
arguments:
- type: "file"
name: "--ref_fasta"
alternatives:
- "-G"
description: "Provide the name of a FASTA-format file that contains the reference\
\ sequences used in read mapping that produced the provided SAM/BAM files.\n"
info: null
example:
- "reference.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Parameters specific to paired end reads"
arguments:
- type: "boolean_true"
name: "--paired"
alternatives:
- "-p"
description: "Specify that input data contain paired-end reads. To perform fragment\
\ counting (ie. counting read pairs), the '--countReadPairs' parameter should\
\ also be specified in addition to this parameter.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--count_read_pairs"
description: "Count read pairs (fragments) instead of reads. This option is only\
\ applicable for paired-end reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--both_aligned"
alternatives:
- "-B"
description: "Count read pairs (fragments) instead of reads. This option is only\
\ applicable for paired-end reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--check_pe_dist"
alternatives:
- "-P"
description: "Check validity of paired-end distance when counting read pairs.\
\ Use '--min_length' and '--max_length' to set thresholds.\n"
info: null
direction: "input"
- type: "integer"
name: "--min_length"
alternatives:
- "-d"
description: "Minimum fragment/template length, 50 by default.\n"
info: null
example:
- 50
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_length"
alternatives:
- "-D"
description: "Maximum fragment/template length, 600 by default.\n"
info: null
example:
- 600
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--same_strand"
alternatives:
- "-C"
description: "Do not count read pairs that have their two ends mapping to different\
\ chromosomes or mapping to same chromosome but on different strands.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--donotsort"
description: "Do not sort reads in BAM/SAM input. Note that reads from the same\
\ pair are required to be located next to each other in the input.\n"
info: null
direction: "input"
- name: "Read groups"
arguments:
- type: "boolean_true"
name: "--by_read_group"
description: "Assign reads by read group. \"RG\" tag is required to be present\
\ in the input BAM/SAM files.\n"
info: null
direction: "input"
- name: "Long reads"
arguments:
- type: "boolean_true"
name: "--long_reads"
description: "Count long reads such as Nanopore and PacBio reads. Long read counting\
\ can only run in one thread and only reads (not read-pairs) can be counted.\
\ There is no limitation on the number of 'M' operations allowed in a CIGAR\
\ string in long read counting.\n"
info: null
direction: "input"
- name: "Assignment results for each read"
arguments:
- type: "file"
name: "--detailed_results"
description: "Directory to save the detailed assignment results. Use `--detailed_results_format`\
\ to determine the format of the detailed results.\n"
info: null
example:
- "detailed_results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--detailed_results_format"
alternatives:
- "-R"
description: "Output detailed assignment results for each read or read-pair. Results\
\ are saved to a file that is in one of the following formats: CORE, SAM and\
\ BAM. See documentaiton for more info about these formats.\n"
info: null
required: false
choices:
- "CORE"
- "SAM"
- "BAM"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Miscellaneous"
arguments:
- type: "integer"
name: "--max_M_op"
description: "Maximum number of 'M' operations allowed in a CIGAR string. 10 by\
\ default. Both 'X' and '=' are treated as 'M' and adjacent 'M' operations are\
\ merged in the CIGAR string.\n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
description: "Output verbose information for debugging, such as un-matched chromosome/contig\
\ names.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "featureCounts is a read summarization program for counting reads generated\
\ from either RNA or genomic DNA sequencing experiments by implementing highly efficient\
\ chromosome hashing and feature blocking techniques. It works with either single\
\ or paired-end reads and provides a wide range of options appropriate for different\
\ sequencing applications.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "Read counting"
- "Genomic features"
license: "GPL-3.0"
references:
doi:
- "10.1093/bioinformatics/btt656"
links:
repository: "https://github.com/ShiLab-Bioinformatics/subread"
homepage: "https://subread.sourceforge.net/"
documentation: "https://subread.sourceforge.net/SubreadUsersGuide.pdf"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/subread:2.0.6--he4a0461_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "featureCounts -v 2>&1 | sed 's/featureCounts v\\([0-9.]*\\)/featureCounts:\
\ \\1/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/featurecounts/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/featurecounts"
executable: "target/executable/featurecounts/featurecounts"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,220 @@
name: "fq_subsample"
version: "bump_viash_0_9_4"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input_1"
description: "First input fastq file to subsample. Accepts both raw and gzipped\
\ FASTQ inputs."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_2"
description: "Second input fastq files to subsample. Accepts both raw and gzipped\
\ FASTQ inputs."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_1"
description: "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_2"
description: "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "double"
name: "--probability"
description: "The probability a record is kept, as a percentage (0.0, 1.0). Cannot\
\ be used with `record-count`"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--record_count"
description: "The exact number of records to keep. Cannot be used with `probability`"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Seed to use for the random number generator"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "fq subsample outputs a subset of records from single or paired FASTQ\
\ files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "fastq"
- "subsample"
- "subset"
license: "MIT"
links:
repository: "https://github.com/stjude-rust-labs/fq"
homepage: "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
documentation: "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "rust:1.81-slim"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && apt-get install -y git procps && \\\ngit clone --depth 1\
\ --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\ncd fq &&\
\ \\\ncargo install --locked --path . && \\\nmv target/release/fq /usr/local/bin/\
\ && \\\ncd / && rm -rf /fq\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/fq_subsample/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/fq_subsample"
executable: "target/executable/fq_subsample/fq_subsample"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,715 @@
name: "gffread"
version: "bump_viash_0_9_4"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "A reference file in either the GFF3, GFF2 or GTF format.\n"
info: null
example:
- "annotation.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chr_mapping"
alternatives:
- "-m"
description: "<chr_replace> is a name mapping table for converting reference sequence\
\ names, \nhaving this 2-column format: <original_ref_ID> <new_ref_ID>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--seq_info"
alternatives:
- "-s"
description: "<seq_info.fsize> is a tab-delimited file providing this info for\
\ each of the mapped \nsequences: <seq-name> <seq-length> <seq-description>\
\ (useful for --description option with \nmRNA/EST/protein mappings).\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Full path to a multi-fasta file with the genomic sequences for all\
\ input mappings, \nOR a directory with single-fasta files (one per genomic\
\ sequence, with file names \nmatching sequence names).\n"
info: null
example:
- "genome.fa"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--outfile"
alternatives:
- "-o"
description: "Write the output records into <outfile>.\n"
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--force_exons"
description: "Make sure that the lowest level GFF features are considered \"exon\"\
\ features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--gene2exon"
description: "For single-line genes not parenting any transcripts, add an exon\
\ feature spanning \nthe entire gene (treat it as a transcript).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--t_adopt"
description: "Try to find a parent gene overlapping/containing a transcript that\
\ does not have \nany explicit gene Parent.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--decode"
alternatives:
- "-D"
description: "Decode url encoded characters within attributes.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--merge_exons"
alternatives:
- "-Z"
description: "Merge very close exons into a single exon (when intron size<4).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--junctions"
alternatives:
- "-j"
description: "Output the junctions and the corresponding transcripts.\n"
info: null
direction: "input"
- type: "file"
name: "--spliced_exons"
alternatives:
- "-w"
description: "Write a fasta file with spliced exons for each transcript.\n"
info: null
example:
- "exons.fa"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--w_add"
description: "For the --spliced_exons option, extract additional <N> bases both\
\ upstream and \ndownstream of the transcript boundaries.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--w_nocds"
description: "For --spliced_exons, disable the output of CDS info in the FASTA\
\ file.\n"
info: null
direction: "input"
- type: "file"
name: "--spliced_cds"
alternatives:
- "-x"
description: "Write a fasta file with spliced CDS for each GFF transcript.\n"
info: null
example:
- "cds.fa"
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tr_cds"
alternatives:
- "-y"
description: "Write a protein fasta file with the translation of CDS for each\
\ record.\n"
info: null
example:
- "tr_cds.fa"
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--w_coords"
alternatives:
- "-W"
description: "For --spliced_exons, --spliced_cds and -tr_cds options, write in\
\ the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--stop_dot"
alternatives:
- "-S"
description: "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--id_version"
alternatives:
- "-L"
description: "Ensembl GTF to GFF3 conversion, adds version to IDs.\n"
info: null
direction: "input"
- type: "string"
name: "--trackname"
alternatives:
- "-t"
description: "Use <trackname> in the 2nd column of each GFF/GTF output line.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--gtf_output"
alternatives:
- "-T"
description: "Main output will be GTF instead of GFF3.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed"
description: "Output records in BED format instead of default GFF3.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--tlf"
description: "Output \"transcript line format\" which is like GFF but with exons\
\ and CDS related \nfeatures stored as GFF attributes in the transcript feature\
\ line, like this:\n exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>\n\
<exons> is a comma-delimited list of exon_start-exon_end coordinates;\n<CDScoords>\
\ is CDS_start:CDS_end coordinates or a list like <exons>.\n"
info: null
direction: "input"
- type: "string"
name: "--table"
description: "Output a simple tab delimited format instead of GFF, with columns\
\ having the values \nof GFF attributes given in <attrlist>; special pseudo-attributes\
\ (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand,\
\ @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds\
\ FASTA output files are enabled, the \nsame fields (excluding @id) are appended\
\ to the definition line of corresponding FASTA\nrecords.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--expose_dups"
alternatives:
- "-E"
- "-v"
description: "Expose (warn about) duplicate transcript IDs and other potential\
\ problems with the \ngiven GFF/GTF records.\n"
info: null
direction: "input"
- name: "Options"
arguments:
- type: "file"
name: "--ids"
description: "Discard records/transcripts if their IDs are not listed in <IDs.lst>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nids"
description: "Discard records/transcripts if their IDs are listed in <IDs.lst>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--maxintron"
alternatives:
- "-i"
description: "Discard transcripts having an intron larger than <maxintron>.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--minlen"
alternatives:
- "-l"
description: "Discard transcripts shorter than <minlen> bases.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--range"
alternatives:
- "-r"
description: "Only show transcripts overlapping coordinate range <start>..<end>\
\ (on chromosome/contig \n<chr>, strand <strand> if provided).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--strict_range"
alternatives:
- "-R"
description: "For --range option, discard all transcripts that are not fully contained\
\ within the given \nrange.\n"
info: null
direction: "input"
- type: "string"
name: "--jmatch"
description: "Only output transcripts matching the given junction.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_single_exon"
alternatives:
- "-U"
description: "Discard single-exon transcripts.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--coding"
alternatives:
- "-C"
description: "Coding only: discard mRNAs that have no CDS features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nc"
description: "Non-coding only: discard mRNAs that have CDS features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ignore_locus"
description: "Discard locus features and attributes found in the input.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--description"
alternatives:
- "-A"
description: "Use the description field from <seq_info.fsize> and add it as the\
\ value for a 'descr' \nattribute to the GFF record.\n"
info: null
direction: "input"
- name: "Sorting"
arguments:
- type: "boolean_true"
name: "--sort_alpha"
description: "Chromosomes (reference sequences) are sorted alphabetically.\n"
info: null
direction: "input"
- type: "file"
name: "--sort_by"
description: "Sort the reference sequences by the order in which their names are\
\ given in the \n<refseq.lst> file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Misc options"
arguments:
- type: "boolean_true"
name: "--keep_attrs"
alternatives:
- "-F"
description: "Keep all GFF attributes (for non-exon features).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep_exon_attrs"
description: "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_exon_attrs"
alternatives:
- "-G"
description: "Do not keep exon attributes, move them to the transcript feature\
\ (for GFF3 output).\n"
info: null
direction: "input"
- type: "string"
name: "--attrs"
description: "Only output the GTF/GFF attributes listed in <attr-list> which is\
\ a comma delimited \nlist of attribute names to.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--keep_genes"
description: "In transcript-only mode (default), also preserve gene records.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep_comments"
description: "For GFF3 input/output, try to preserve comments.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--process_other"
alternatives:
- "-O"
description: "process other non-transcript GFF records (by default non-transcript\
\ records are ignored).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--rm_stop_codons"
alternatives:
- "-V"
description: "Discard any mRNAs with CDS having in-frame stop codons (requires\
\ --genome).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--adj_cds_start"
alternatives:
- "-H"
description: "For --rm_stop_codons option, check and adjust the starting CDS phase\
\ if the original phase\nleads to a translation with an in-frame stop codon.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--opposite_strand"
alternatives:
- "-B"
description: "For -V option, single-exon transcripts are also checked on the opposite\
\ strand (requires \n--genome). \n"
info: null
direction: "input"
- type: "boolean_true"
name: "--coding_status"
alternatives:
- "-P"
description: "Add transcript level GFF attributes about the coding status of each\
\ transcript, including \npartialness or in-frame stop codons (requires --genome).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--add_hasCDS"
description: "Add a \"hasCDS\" attribute with value \"true\" for transcripts that\
\ have CDS features. \n"
info: null
direction: "input"
- type: "boolean_true"
name: "--adj_stop"
description: "Stop codon adjustment: enables --coding_status and performs automatic\
\ adjustment of the CDS stop \ncoordinate if premature or downstream.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--rm_noncanon"
alternatives:
- "-N"
description: "Discard multi-exon mRNAs that have any intron with a non-canonical\
\ splice site consensus \n(i.e. not GT-AG, GC-AG or AT-AC).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--complete_cds"
alternatives:
- "-J"
description: "Discard any mRNAs that either lack initial START codon or the terminal\
\ STOP codon, or \nhave an in-frame stop codon (i.e. only print mRNAs with a\
\ complete CDS).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_pseudo"
description: "Filter out records matching the 'pseudo' keyword.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--in_bed"
description: "Input should be parsed as BED format (automatic if the input filename\
\ ends with .bed*).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--in_tlf"
description: "Input GFF-like one-line-per-transcript format without exon/CDS features\
\ (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--stream"
description: "Fast processing of input GFF/BED transcripts as they are received\
\ (no sorting, exons must \nbe grouped by transcript in the input data).\n"
info: null
direction: "input"
- name: "Clustering"
arguments:
- type: "boolean_true"
name: "--merge"
alternatives:
- "-M"
description: "Cluster the input transcripts into loci, discarding \"redundant\"\
\ transcripts (those with \nthe same exact introns and fully contained or equal\
\ boundaries).\n"
info: null
direction: "input"
- type: "file"
name: "--dupinfo"
alternatives:
- "-d"
description: "For --merge option, write duplication info to file <dupinfo>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--cluster_only"
description: "Same as --merge but without discarding any of the \"duplicate\"\
\ transcripts, only create \n\"locus\" features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--rm_redundant"
alternatives:
- "-K"
description: "For --merge option: also discard as redundant the shorter, fully\
\ contained transcripts (intron \nchains matching a part of the container).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_boundary"
alternatives:
- "-Q"
description: "For --merge option, no longer require boundary containment when\
\ assessing redundancy (can be \ncombined with --rm_redundant); only introns\
\ have to match for multi-exon transcripts, and >=80%\noverlap for single-exon\
\ transcripts.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_overlap"
alternatives:
- "-Y"
description: "For --merge option, enforce --no_boundary but also discard overlapping\
\ single-exon transcripts,\neven on the opposite strand (can be combined with\
\ --rm_redudant).\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Validate, filter, convert and perform various other operations on GFF\
\ files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "gff"
- "conversion"
- "validation"
- "filtering"
license: "MIT"
references:
doi:
- "10.12688/f1000research.23297.2"
links:
repository: "https://github.com/gpertea/gffread"
homepage: "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread"
documentation: "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/gffread:0.12.7--hdcf5f25_3"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo \"gffread: \\\"$(gffread --version 2>&1)\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/gffread/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/gffread"
executable: "target/executable/gffread/gffread"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

2771
target/executable/gffread/gffread Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,248 @@
name: "kallisto_index"
namespace: "kallisto"
version: "bump_viash_0_9_4"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "Path to a FASTA-file containing the transcriptome sequences, either\
\ in plain text or \ncompressed (.gz) format.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--d_list"
description: "Path to a FASTA-file containing sequences to mask from quantification.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--index"
info: null
example:
- "Kallisto_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--kmer_size"
description: "Kmer length passed to indexing step of pseudoaligners (default:\
\ '31').\n"
info: null
example:
- 31
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--make_unique"
description: "Replace repeated target names with unique names.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--aa"
description: "Generate index from a FASTA-file containing amino acid sequences.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--distiguish"
description: "Generate index where sequences are distinguished by the sequence\
\ names.\n"
info: null
direction: "input"
- type: "integer"
name: "--min_size"
alternatives:
- "-m"
description: "Length of minimizers (default: automatically chosen).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--ec_max_size"
alternatives:
- "-e"
description: "Maximum number of targets in an equivalence class (default: no maximum).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--tmp"
alternatives:
- "-T"
description: "Path to a directory for temporary files.\n"
info: null
example:
- "tmp"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Build a Kallisto index for the transcriptome to use Kallisto in the\
\ mapping-based mode.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "kallisto"
- "index"
license: "BSD 2-Clause License"
references:
doi:
- "https://doi.org/10.1038/nbt.3519"
links:
repository: "https://github.com/pachterlab/kallisto"
homepage: "https://pachterlab.github.io/kallisto/about"
documentation: "https://pachterlab.github.io/kallisto/manual"
issue_tracker: "https://github.com/pachterlab/kallisto/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
\ /usr/local/bin/\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/kallisto/kallisto_index/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/kallisto/kallisto_index"
executable: "target/executable/kallisto/kallisto_index/kallisto_index"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,276 @@
name: "kallisto_quant"
namespace: "kallisto"
version: "bump_viash_0_9_4"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "List of input FastQ files of size 1 and 2 for single-end and paired-end\
\ data, respectively."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--index"
alternatives:
- "-i"
description: "Kallisto genome index."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_dir"
alternatives:
- "-o"
description: "Directory to write output to."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--log"
description: "File containing log information from running kallisto quant"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--single"
description: "Single end mode."
info: null
direction: "input"
- type: "boolean_true"
name: "--single_overhang"
description: "Include reads where unobserved rest of fragment is predicted to\
\ lie outside a transcript."
info: null
direction: "input"
- type: "boolean_true"
name: "--fr_stranded"
description: "Strand specific reads, first read forward."
info: null
direction: "input"
- type: "boolean_true"
name: "--rf_stranded"
description: "Strand specific reads, first read reverse."
info: null
direction: "input"
- type: "double"
name: "--fragment_length"
alternatives:
- "-l"
description: "The estimated average fragment length."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--sd"
alternatives:
- "-s"
description: "The estimated standard deviation of the fragment length (default:\
\ -l, -s values are estimated \nfrom paired end data, but are required when\
\ using --single).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--plaintext"
description: "Output plaintext instead of HDF5."
info: null
direction: "input"
- type: "integer"
name: "--bootstrap_samples"
alternatives:
- "-b"
description: "Number of bootstrap samples to draw. Default: '0'\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Random seed for bootstrap. Default: '42'\n"
info: null
example:
- 42
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Quantifying abundances of transcripts from RNA-Seq data, or more generally\
\ of target sequences using high-throughput sequencing reads.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "kallisto"
- "quant"
- "pseudoalignment"
license: "BSD 2-Clause License"
references:
doi:
- "10.1038/nbt.3519"
links:
repository: "https://github.com/pachterlab/kallisto"
homepage: "https://pachterlab.github.io/kallisto/about"
documentation: "https://pachterlab.github.io/kallisto/manual"
issue_tracker: "https://github.com/pachterlab/kallisto/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
\ /usr/local/bin/\n"
- type: "docker"
run:
- "echo \"kallisto: $(kallisto version | sed 's/kallisto, version //')\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/kallisto/kallisto_quant/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/kallisto/kallisto_quant"
executable: "target/executable/kallisto/kallisto_quant/kallisto_quant"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,537 @@
name: "lofreq_call"
namespace: "lofreq"
version: "bump_viash_0_9_4"
authors:
- name: "Kai Waldrant"
roles:
- "author"
- "maintainer"
info:
links:
email: "kai@data-intuitive.com"
github: "KaiWaldrant"
orcid: "0009-0003-8555-1361"
linkedin: "kaiwaldrant"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Contributor"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input BAM file.\n"
info: null
example:
- "normal.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_bai"
description: "Index file for the input BAM file.\n"
info: null
example:
- "normal.bai"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ref"
alternatives:
- "-f"
description: "Indexed reference fasta file (gzip supported). Default: none.\n"
info: null
example:
- "reference.fasta"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--out"
alternatives:
- "-o"
description: "Vcf output file. Default: stdout.\n"
info: null
example:
- "output.vcf"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--region"
alternatives:
- "-r"
description: "Limit calls to this region (chrom:start-end). Default: none.\n"
info: null
example:
- "chr1:1000-2000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bed"
alternatives:
- "-l"
description: "List of positions (chr pos) or regions (BED). Default: none.\n"
info: null
example:
- "regions.bed"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_bq"
alternatives:
- "-q"
description: "Skip any base with baseQ smaller than INT. Default: 6.\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_alt_bq"
alternatives:
- "-Q"
description: "Skip alternate bases with baseQ smaller than INT. Default: 6.\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--def_alt_bq"
alternatives:
- "-R"
description: "Overwrite baseQs of alternate bases (that passed bq filter) with\
\ this value (-1: use median ref-bq; 0: keep). Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_jq"
alternatives:
- "-j"
description: "Skip any base with joinedQ smaller than INT. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_alt_jq"
alternatives:
- "-J"
description: "Skip alternate bases with joinedQ smaller than INT. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--def_alt_jq"
alternatives:
- "-K"
description: "Overwrite joinedQs of alternate bases (that passed jq filter) with\
\ this value (-1: use median ref-bq; 0: keep). Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_baq"
alternatives:
- "-B"
description: "Disable use of base-alignment quality (BAQ).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_idaq"
alternatives:
- "-A"
description: "Don't use IDAQ values (NOT recommended under ANY circumstances other\
\ than debugging).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--del_baq"
alternatives:
- "-D"
description: "Delete pre-existing BAQ values, i.e. compute even if already present\
\ in BAM.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_ext_baq"
alternatives:
- "-e"
description: "Use 'normal' BAQ (samtools default) instead of extended BAQ (both\
\ computed on the fly if not already present in lb tag).\n"
info: null
direction: "input"
- type: "integer"
name: "--min_mq"
alternatives:
- "-m"
description: "Skip reads with mapping quality smaller than INT. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_mq"
alternatives:
- "-M"
description: "Cap mapping quality at INT. Default: 255.\n"
info: null
example:
- 255
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_mq"
alternatives:
- "-N"
description: "Don't merge mapping quality in LoFreq's model.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--call_indels"
description: "Enable indel calls (note: preprocess your file to include indel\
\ alignment qualities!).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--only_indels"
description: "Only call indels; no SNVs.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--src_qual"
alternatives:
- "-s"
description: "Enable computation of source quality.\n"
info: null
direction: "input"
- type: "file"
name: "--ign_vcf"
alternatives:
- "-S"
description: "Ignore variants in this vcf file for source quality computation.\
\ Multiple files can be given separated by commas.\n"
info: null
example:
- "variants.vcf"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--def_nm_q"
alternatives:
- "-T"
description: "If >= 0, then replace non-match base qualities with this default\
\ value. Default: -1.\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--sig"
alternatives:
- "-a"
description: "P-Value cutoff / significance level. Default: 0.010000.\n"
info: null
example:
- 0.01
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--bonf"
alternatives:
- "-b"
description: "Bonferroni factor. 'dynamic' (increase per actually performed test)\
\ or INT. Default: Dynamic.\n"
info: null
example:
- "dynamic"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_cov"
alternatives:
- "-C"
description: "Test only positions having at least this coverage. Default: 1.\n\
(note: without --no-default-filter default filters (incl. coverage) kick in\
\ after predictions are done).\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_depth"
alternatives:
- "-d"
description: "Cap coverage at this depth. Default: 1000000.\n"
info: null
example:
- 1000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--illumina_13"
description: "Assume the quality is Illumina-1.3-1.7/ASCII+64 encoded.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--use_orphan"
description: "Count anomalous read pairs (i.e. where mate is not aligned properly).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--plp_summary_only"
description: "No variant calling. Just output pileup summary per column.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_default_filter"
description: "Don't run default 'lofreq filter' automatically after calling variants.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--force_overwrite"
description: "Overwrite any existing output.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--verbose"
description: "Be verbose.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--debug"
description: "Enable debugging.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Call variants from a BAM file.\n\nLoFreq* (i.e. LoFreq version 2) is\
\ a fast and sensitive variant-caller for inferring SNVs and indels from next-generation\
\ sequencing data. It makes full use of base-call qualities and other sources of\
\ errors inherent in sequencing (e.g. mapping or base/indel alignment uncertainty),\
\ which are usually ignored by other methods or only used for filtering.\n\nLoFreq*\
\ can run on almost any type of aligned sequencing data (e.g. Illumina, IonTorrent\
\ or Pacbio) since no machine- or sequencing-technology dependent thresholds are\
\ used. It automatically adapts to changes in coverage and sequencing quality and\
\ can therefore be applied to a variety of data-sets e.g. viral/quasispecies, bacterial,\
\ metagenomics or somatic data.\n\nLoFreq* is very sensitive; most notably, it is\
\ able to predict variants below the average base-call quality (i.e. sequencing\
\ error rate). Each variant call is assigned a p-value which allows for rigorous\
\ false positive control. Even though it uses no approximations or heuristics, it\
\ is very efficient due to several runtime optimizations and also provides a (pseudo-)parallel\
\ implementation. LoFreq* is generic and fast enough to be applied to high-coverage\
\ data and large genomes. On a single processor it takes a minute to analyze Dengue\
\ genome sequencing data with nearly 4000X coverage, roughly one hour to call SNVs\
\ on a 600X coverage E.coli genome and also roughly an hour to run on a 100X coverage\
\ human exome dataset.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "variant calling"
- "low frequancy variant calling"
- "lofreq"
- "lofreq/call"
license: "MIT"
references:
doi:
- "10.1093/nar/gks918"
links:
repository: "https://github.com/viash-hub/biobox"
homepage: "https://csb5.github.io/lofreq/"
documentation: "https://csb5.github.io/lofreq/commands/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/lofreq:2.1.5--py38h794fc9e_10"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "version=$(lofreq version | grep 'version' | sed 's/version: //') && \\\necho\
\ \"lofreq: $version\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/lofreq/call/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/lofreq/lofreq_call"
executable: "target/executable/lofreq/lofreq_call/lofreq_call"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,245 @@
name: "lofreq_indelqual"
namespace: "lofreq"
version: "bump_viash_0_9_4"
authors:
- name: "Kai Waldrant"
roles:
- "author"
- "maintainer"
info:
links:
email: "kai@data-intuitive.com"
github: "KaiWaldrant"
orcid: "0009-0003-8555-1361"
linkedin: "kaiwaldrant"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Contributor"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input BAM file.\n"
info: null
example:
- "normal.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ref"
alternatives:
- "-f"
description: "Reference sequence used for mapping (Only required for --dindel).\n"
info: null
example:
- "reference.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--out"
alternatives:
- "-o"
description: "Output BAM file.\n"
info: null
example:
- "output.bam"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--uniform"
alternatives:
- "-u"
description: "Add this indel quality uniformly to all bases. Use two comma separated\
\ values to specify insertion and deletion quality separately. (clashes with\
\ --dindel).\n"
info: null
example:
- "50,50"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--dindel"
description: "Add Dindel's indel qualities (Illumina specific) (clashes with -u;\
\ needs --ref).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--verbose"
description: "Be verbose.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Insert indel qualities into BAM file (required for indel predictions).\n\
\nThe preferred way of inserting indel qualities should be via GATK's BQSR (>=2)\
\ If that's not possible, use this subcommand.\nThe command has two modes: 'uniform'\
\ and 'dindel':\n- 'uniform' will assign a given value uniformly, whereas\n- 'dindel'\
\ will insert indel qualities based on Dindel (PMID 20980555).\nBoth will overwrite\
\ any existing values.\nDo not realign your BAM file afterwards!\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
keywords:
- "bam"
- "indel"
- "qualities"
- "indelqual"
- "lofreq"
- "lofreq/indelqual"
license: "MIT"
references:
doi:
- "10.1093/nar/gks918"
links:
repository: "https://github.com/viash-hub/biobox"
homepage: "https://csb5.github.io/lofreq/"
documentation: "https://csb5.github.io/lofreq/commands/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/lofreq:2.1.5--py38h794fc9e_10"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "version=$(lofreq version | grep 'version' | sed 's/version: //') && \\\necho\
\ \"lofreq: $version\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/lofreq/indelqual/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/lofreq/lofreq_indelqual"
executable: "target/executable/lofreq/lofreq_indelqual/lofreq_indelqual"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,486 @@
name: "multiqc"
version: "bump_viash_0_9_4"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "File paths to be searched for analysis results to be included in\
\ the report.\n"
info: null
example:
- "data/results"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Ouput"
arguments:
- type: "file"
name: "--output_report"
description: "Filepath of the generated report.\n"
info: null
example:
- "multiqc_report.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_data"
description: "Output directory for parsed data files. If not provided, parsed\
\ data will not be published.\n"
info: null
example:
- "multiqc_data"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_plots"
description: "Output directory for generated plots. If not provided, plots will\
\ not be published.\n"
info: null
example:
- "multiqc_plots"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Modules and analyses to run"
arguments:
- type: "string"
name: "--include_modules"
description: "Use only these module"
info: null
example:
- "fastqc"
- "cutadapt"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--exclude_modules"
description: "Do not use only these modules"
info: null
example:
- "fastqc"
- "cutadapt"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--ignore_analysis"
info: null
example:
- "run_one/*"
- "run_two/*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--ignore_samples"
info: null
example:
- "sample_1*"
- "sample_3*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--ignore_symlinks"
description: "Ignore symlinked directories and files"
info: null
direction: "input"
- name: "Sample name handling"
arguments:
- type: "boolean_true"
name: "--dirs"
description: "Prepend directory to sample names to avoid clashing filenames"
info: null
direction: "input"
- type: "integer"
name: "--dirs_depth"
description: "Prepend n directories to sample names. Negative number to take from\
\ start of path."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--full_names"
description: "Do not clean the sample names (leave as full file name)"
info: null
direction: "input"
- type: "boolean_true"
name: "--fn_as_s_name"
description: "Use the log filename as the sample name"
info: null
direction: "input"
- type: "file"
name: "--replace_names"
description: "TSV file to rename sample names during report generation"
info: null
example:
- "replace_names.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Report Customisation"
arguments:
- type: "string"
name: "--title"
description: "Report title. Printed as page header, used for filename if not otherwise\
\ specified.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--comment"
description: "Custom comment, will be printed at the top of the report.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--template"
description: "Report template to use.\n"
info: null
required: false
choices:
- "default"
- "gathered"
- "geo"
- "highcharts"
- "sections"
- "simple"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_names"
description: "TSV file containing alternative sample names for renaming buttons\
\ in the report.\n"
info: null
example:
- "sample_names.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_filters"
description: "TSV file containing show/hide patterns for the report\n"
info: null
example:
- "sample_filters.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--custom_css_file"
description: "Custom CSS file to add to the final report\n"
info: null
example:
- "custom_style_sheet.css"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--profile_runtime"
description: "Add analysis of how long MultiQC takes to run to the report\n"
info: null
direction: "input"
- name: "MultiQC behaviour"
arguments:
- type: "boolean_true"
name: "--verbose"
description: "Increase output verbosity.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
description: "Only show log warnings\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--strict"
description: "Don't catch exceptions, run additional code checks to help development.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--development"
description: "Development mode. Do not compress and minimise JS, export uncompressed\
\ plot data.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--require_logs"
description: "Require all explicitly requested modules to have log files. If not,\
\ MultiQC will exit with an error.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_megaqc_upload"
description: "Don't upload generated report to MegaQC, even if MegaQC options\
\ are found.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_ansi"
description: "Disable coloured log output.\n"
info: null
direction: "input"
- type: "string"
name: "--cl_config"
description: "YAML formatted string that allows to customize MultiQC behaviour\
\ like input file detection.\n"
info: null
example:
- "qualimap_config: { general_stats_coverage: [20,40,200] }"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output format"
arguments:
- type: "boolean_true"
name: "--flat"
description: "Use only flat plots (static images).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--interactive"
description: "Use only interactive plots (in-browser Javascript).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--data_dir"
description: "Force the parsed data directory to be created.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_data_dir"
description: "Prevent the parsed data directory from being created.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--zip_data_dir"
description: "Compress the data directory.\n"
info: null
direction: "input"
- type: "string"
name: "--data_format"
description: "Output parsed data in a different format than the default 'txt'.\n"
info: null
required: false
choices:
- "tsv"
- "csv"
- "json"
- "yaml"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--pdf"
description: "Creates PDF report with the 'simple' template. Requires Pandoc to\
\ be installed.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "MultiQC aggregates results from bioinformatics analyses across many\
\ samples into a single report.\nIt searches a given directory for analysis logs\
\ and compiles a HTML report. It's a general use tool, perfect for summarising the\
\ output from numerous bioinformatics tools.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info:
keywords:
- "QC"
- "html report"
- "aggregate analysis"
links:
homepage: "https://multiqc.info/"
documentation: "https://multiqc.info/docs/"
repository: "https://github.com/MultiQC/MultiQC"
references:
doi: "10.1093/bioinformatics/btw354"
licence: "GPL v3 or later"
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/biobox"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "bump_viash_0_9_4"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "multiqc --version | sed 's/multiqc, version\\s\\(.*\\)/multiqc: \"\\1\"/' >\
\ /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "jq"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/multiqc/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/multiqc"
executable: "target/executable/multiqc/multiqc"
viash_version: "0.9.4"
git_commit: "be1cd83dd684d0a881a9c1f8e5b43d82a15d8b76"
git_remote: "https://github.com/viash-hub/biobox"
git_tag: "v0.2.0-34-gbe1cd83"
package_config:
name: "biobox"
version: "bump_viash_0_9_4"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'bump_viash_0_9_4'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

2011
target/executable/multiqc/multiqc Executable file

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More