Build branch v0.2 with version v0.2 (f22ab0e)

Build pipeline: viash-hub.biobox.v0.2-s4bkm

Source commit: f22ab0eab5

Source message: Prep v0.2.0
This commit is contained in:
CI
2024-09-12 13:07:11 +00:00
commit 884fc9474e
691 changed files with 402692 additions and 0 deletions

View File

@@ -0,0 +1,261 @@
name: "agat_convert_bed2gff"
namespace: "agat"
version: "v0.2"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--bed"
description: "Input bed file that will be converted."
info: null
example:
- "input.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gff"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--source"
description: "The source informs about the tool used to produce the data and is\
\ stored in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc.\
\ [default: data]\n"
info: null
example:
- "Stringtie"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--primary_tag"
description: "The primary_tag corresponds to the data type and is stored in 3rd\
\ field of a gff file. Example: gene, mRNA, CDS, etc. [default: gene]\n"
info: null
example:
- "gene"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_false"
name: "--inflate_off"
description: "By default we inflate the block fields (blockCount, blockSizes,\
\ blockStarts) to create subfeatures of the main feature (primary_tag). The\
\ type of subfeature created is based on the inflate_type parameter. If you\
\ do not want this inflating behaviour you can deactivate it by using the --inflate_off\
\ option.\n"
info: null
direction: "input"
- type: "string"
name: "--inflate_type"
description: "Feature type (3rd column in gff) created when inflate parameter\
\ activated [default: exon].\n"
info: null
example:
- "exon"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
description: "add verbosity"
info: null
direction: "input"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "Input agat config file. By default AGAT takes as input agat_config.yaml\
\ file from the working directory if any, otherwise it takes the orignal agat_config.yaml\
\ shipped with AGAT. To get the agat_config.yaml locally type: \"agat config\
\ --expose\". The --config option gives you the possibility to use your own\
\ AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script takes a bed file as input, and will translate it in gff format.\
\ The BED format is described here The script converts 0-based, half-open [start-1,\
\ end) bed file to 1-based, closed [start, end] General Feature Format v3 (GFF3).\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_bed2gff.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_bed2gff/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_bed2gff"
executable: "target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,251 @@
name: "agat_convert_embl2gff"
namespace: "agat"
version: "v0.2"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--embl"
description: "Input EMBL file that will be read."
info: null
example:
- "input.embl"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gff"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "boolean_true"
name: "--emblmygff3"
description: "Means that the EMBL flat file comes from the EMBLmyGFF3 software.\
\ This is an EMBL format dedicated for submission and contains particularity\
\ to deal with. This parameter is needed to get a proper sequence id in the\
\ GFF3 from an embl made with EMBLmyGFF3.\n"
info: null
direction: "input"
- type: "string"
name: "--primary_tag"
alternatives:
- "--pt"
- "-t"
description: "List of \"primary tag\". Useful to discard or keep specific features.\
\ Multiple tags must be comma-separated.\n"
info: null
example:
- "tag1"
- "tag2"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--discard"
alternatives:
- "-d"
description: "Means that primary tags provided by the option \"primary_tag\" will\
\ be discarded.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep"
alternatives:
- "-k"
description: "Means that only primary tags provided by the option \"primary_tag\"\
\ will be kept.\n"
info: null
direction: "input"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "Input agat config file. By default AGAT takes as input agat_config.yaml\
\ file from the working directory if any, otherwise it takes the original agat_config.yaml\
\ shipped with AGAT. To get the agat_config.yaml locally type: \"agat config\
\ --expose\". The --config option gives you the possibility to use your own\
\ AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script takes an EMBL file as input, and will translate it in gff\
\ format.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_embl2gff.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_embl2gff/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_embl2gff"
executable: "target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,254 @@
name: "agat_convert_sp_gff2gtf"
namespace: "agat"
version: "v0.2"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-i"
description: "Input GFF/GTF file that will be read"
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
- "--gtf"
description: "Output GTF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gtf"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--gtf_version"
description: "Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default\
\ value from AGAT config file (relax for the default config). The script option\
\ has the higher priority. \n\n * relax: all feature types are accepted. \
\ \n * GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine,\
\ start_codon, stop_codon, three_prime_utr and five_prime_utr. \n * GTF2.5\
\ (8 feature types accepted): gene, transcript, exon, CDS, UTR, start_codon,\
\ stop_codon, Selenocysteine. \n * GTF2.2 (9 feature types accepted): CDS,\
\ start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon.\
\ \n * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, exon,\
\ 5UTR, 3UTR. \n * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon,\
\ exon. \n * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon,\
\ exon, intron. \n"
info: null
example:
- "3"
required: false
choices:
- "relax"
- "1"
- "2"
- "2.1"
- "2.2"
- "2.5"
- "3"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "Input agat config file. By default AGAT takes as input agat_config.yaml\
\ file from the working directory if any, otherwise it takes the orignal agat_config.yaml\
\ shipped with AGAT. To get the agat_config.yaml locally type: \"agat config\
\ --expose\". The --config option gives you the possibility to use your own\
\ AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script aims to convert any GTF/GFF file into a proper GTF file.\
\ Full\ninformation about the format can be found here:\nhttps://agat.readthedocs.io/en/latest/gxf.html\
\ You can choose among 7\ndifferent GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax).\
\ Depending the\nversion selected the script will filter out the features that are\
\ not\naccepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene\npseudogene)\
\ will be converted into gene feature and every level2 feature\n(e.g mRNA ncRNA)\
\ will be converted into transcript feature. Using the\n\"relax\" option you will\
\ produce a GTF-like output keeping all original\nfeature types (3rd column). No\
\ modification will occur e.g. mRNA to\ntranscript.\n\nTo be fully GTF compliant\
\ all feature have a gene_id and a transcript_id\nattribute. The gene_id is unique\
\ identifier for the genomic source of\nthe transcript, which is used to group transcripts\
\ into genes. The\ntranscript_id is a unique identifier for the predicted transcript,\
\ which\nis used to group features into transcripts.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GTF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_sp_gff2gtf"
executable: "target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,214 @@
name: "agat_convert_sp_gff2tsv"
namespace: "agat"
version: "v0.2"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gff"
alternatives:
- "-f"
description: "Input GTF/GFF file."
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
- "--out"
- "--outfile"
description: "Output GFF file. If no output file is specified, the output will\
\ be written to STDOUT."
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "String - Input agat config file. By default AGAT takes as input\n\
agat_config.yaml file from the working directory if any,\notherwise it takes\
\ the orignal agat_config.yaml shipped with\nAGAT. To get the agat_config.yaml\
\ locally type: \"agat config\n--expose\". The --config option gives you the\
\ possibility to use\nyour own AGAT config file (located elsewhere or named\n\
differently). \n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "The script aims to convert gtf/gff file into tabulated file. Attribute's\n\
tags from the 9th column become column titles.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_sp_gff2tsv.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_sp_gff2tsv"
executable: "target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,221 @@
name: "agat_convert_sp_gxf2gxf"
namespace: "agat"
version: "v0.2"
authors:
- name: "Leïla Paquay"
roles:
- "author"
- "maintainer"
info:
links:
email: "leila@data-intuitive.com"
github: "Leila011"
linkedin: "leilapaquay"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Software Developer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--gxf"
alternatives:
- "-g"
- "--gtf"
- "--gff"
description: "String - Input GTF/GFF file. Compressed file with .gz extension\
\ is accepted.\n"
info: null
example:
- "input.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "String - Output GFF file. If no output file is specified, the output\
\ will be written to STDOUT.\n"
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "file"
name: "--config"
alternatives:
- "-c"
description: "String - Input agat config file. By default AGAT takes as input\
\ agat_config.yaml file from the working directory if any, otherwise it takes\
\ the original agat_config.yaml shipped with AGAT. To get the agat_config.yaml\
\ locally type: \"agat config --expose\". The --config option gives you the\
\ possibility to use your own AGAT config file (located elsewhere or named differently).\n"
info: null
example:
- "custom_agat_config.yaml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "This script fixes and/or standardizes any GTF/GFF file into full sorted\n\
GTF/GFF file. It AGAT parser removes duplicate features, fixes\nduplicated IDs,\
\ adds missing ID and/or Parent attributes, deflates\nfactorized attributes (attributes\
\ with several parents are duplicated\nwith uniq ID), add missing features when\
\ possible (e.g. add exon if only\nCDS described, add UTR if CDS and exon described),\
\ fix feature locations\n(e.g. check exon is embedded in the parent features mRNA,\
\ gene), etc...\n\nAll AGAT's scripts with the _sp_ prefix use the AGAT parser,\
\ before to\nperform any supplementary task. So, it is not necessary to run this\n\
script prior the use of any other _sp_ script.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "gene annotations"
- "GFF conversion"
license: "GPL-3.0"
references:
doi:
- "10.5281/zenodo.3552717"
links:
repository: "https://github.com/NBISweden/AGAT"
homepage: "https://github.com/NBISweden/AGAT"
documentation: "https://agat.readthedocs.io/en/latest/tools/agat_convert_sp_gxf2gxf.html"
issue_tracker: "https://github.com/NBISweden/AGAT/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "agat --version | sed 's/AGAT\\s\\(.*\\)/agat: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/agat/agat_convert_sp_gxf2gxf"
executable: "target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,732 @@
name: "arriba"
version: "v0.2"
authors:
- name: "Robrecht Cannoodt"
roles:
- "author"
- "maintainer"
info:
links:
email: "robrecht@data-intuitive.com"
github: "rcannood"
orcid: "0000-0003-3641-729X"
linkedin: "robrechtcannoodt"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Science Engineer"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Core Member"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--bam"
alternatives:
- "-x"
description: "File in SAM/BAM/CRAM format with main alignments as generated by\
\ STAR\n(Aligned.out.sam). Arriba extracts candidate reads from this file.\n"
info: null
example:
- "Aligned.out.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-a"
description: "FastA file with genome sequence (assembly). The file may be gzip-compressed.\
\ An \nindex with the file extension .fai must exist only if CRAM files are\
\ processed.\n"
info: null
example:
- "assembly.fa"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gene_annotation"
alternatives:
- "-g"
description: "GTF file with gene annotation. The file may be gzip-compressed.\n"
info: null
example:
- "annotation.gtf"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--known_fusions"
alternatives:
- "-k"
description: "File containing known/recurrent fusions. Some cancer entities are\
\ often \ncharacterized by fusions between the same pair of genes. In order\
\ to boost \nsensitivity, a list of known fusions can be supplied using this\
\ parameter. The list \nmust contain two columns with the names of the fused\
\ genes, separated by tabs.\n"
info: null
example:
- "known_fusions.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--blacklist"
alternatives:
- "-b"
description: "File containing blacklisted events (recurrent artifacts and transcripts\
\ \nobserved in healthy tissue).\n"
info: null
example:
- "blacklist.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--structural_variants"
alternatives:
- "-d"
description: "Tab-separated file with coordinates of structural variants found\
\ using \nwhole-genome sequencing data. These coordinates serve to increase\
\ sensitivity \ntowards weakly expressed fusions and to eliminate fusions with\
\ low evidence. \n"
info: null
example:
- "structural_variants_from_WGS.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tags"
alternatives:
- "-t"
description: "Tab-separated file containing fusions to annotate with tags in the\
\ 'tags' column. \nThe first two columns specify the genes; the third column\
\ specifies the tag. The \nfile may be gzip-compressed. \n"
info: null
example:
- "tags.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--protein_domains"
alternatives:
- "-p"
description: "File in GFF3 format containing coordinates of the protein domains\
\ of genes. The\nprotein domains retained in a fusion are listed in the column\n\
'retained_protein_domains'. The file may be gzip-compressed.\n"
info: null
example:
- "protein_domains.gff3"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--fusions"
alternatives:
- "-o"
description: "Output file with fusions that have passed all filters.\n"
info: null
example:
- "fusions.tsv"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fusions_discarded"
alternatives:
- "-O"
description: "Output file with fusions that were discarded due to filtering. \n"
info: null
example:
- "fusions.discarded.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "long"
name: "--max_genomic_breakpoint_distance"
alternatives:
- "-D"
description: "When a file with genomic breakpoints obtained via \nwhole-genome\
\ sequencing is supplied via the --structural_variants\nparameter, this parameter\
\ determines how far a \ngenomic breakpoint may be away from a \ntranscriptomic\
\ breakpoint to consider it as a \nrelated event. For events inside genes, the\
\ \ndistance is added to the end of the gene; for \nintergenic events, the distance\
\ threshold is \napplied as is. Default: 100000.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
alternatives:
- "-s"
description: "Whether a strand-specific protocol was used for library preparation,\
\ \nand if so, the type of strandedness (auto/yes/no/reverse). When \nunstranded\
\ data is processed, the strand can sometimes be inferred from \nsplice-patterns.\
\ But in unclear situations, stranded data helps \nresolve ambiguities. Default:\
\ auto\n"
info: null
required: false
choices:
- "auto"
- "yes"
- "no"
- "reverse"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--interesting_contigs"
alternatives:
- "-i"
description: "List of interesting contigs. Fusions between genes \non other contigs\
\ are ignored. Contigs can be specified with or without the \nprefix \"chr\"\
. Asterisks (*) are treated as wild-cards. \nDefault: 1 2 3 4 5 6 7 8 9 10 11\
\ 12 13 14 15 16 17 18 19 20 21 22 X Y AC_* NC_*\n"
info: null
example:
- "1"
- "2"
- "AC_*"
- "NC_*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--viral_contigs"
alternatives:
- "-v"
description: "List of viral contigs. Asterisks (*) are treated as \nwild-cards.\n\
Default: AC_* NC_*\n"
info: null
example:
- "AC_*"
- "NC_*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--disable_filters"
alternatives:
- "-f"
description: "List of filters to disable. By default all filters are \nenabled.\
\ \n"
info: null
required: false
choices:
- "homologs"
- "low_entropy"
- "isoforms"
- "top_expressed_viral_contigs"
- "viral_contigs"
- "uninteresting_contigs"
- "non_coding_neighbors"
- "mismatches"
- "duplicates"
- "no_genomic_support"
- "genomic_support"
- "intronic"
- "end_to_end"
- "relative_support"
- "low_coverage_viral_contigs"
- "merge_adjacent"
- "mismappers"
- "multimappers"
- "same_gene"
- "long_gap"
- "internal_tandem_duplication"
- "small_insert_size"
- "read_through"
- "inconsistently_clipped"
- "intragenic_exonic"
- "marginal_read_through"
- "spliced"
- "hairpin"
- "blacklist"
- "min_support"
- "select_best"
- "in_vitro"
- "short_anchor"
- "known_fusions"
- "no_coverage"
- "homopolymer"
- "many_spliced"
direction: "input"
multiple: true
multiple_sep: ";"
- type: "double"
name: "--max_e_value"
alternatives:
- "-E"
description: "Arriba estimates the number of fusions with a given number of supporting\
\ \nreads which one would expect to see by random chance. If the expected number\
\ \nof fusions (e-value) is higher than this threshold, the fusion is \ndiscarded\
\ by the 'relative_support' filter. Note: Increasing this \nthreshold can dramatically\
\ increase the number of false positives and may \nincrease the runtime of resource-intensive\
\ steps. Fractional values are \npossible. Default: 0.300000 \n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_supporting_reads"
alternatives:
- "-S"
description: "The 'min_support' filter discards all fusions with fewer than \n\
this many supporting reads (split reads and discordant mates \ncombined). Default:\
\ 2 \n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_mismappers"
alternatives:
- "-m"
description: "When more than this fraction of supporting reads turns out to be\
\ \nmismappers, the 'mismappers' filter discards the fusion. Default: \n0.800000\n"
info: null
example:
- 0.8
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_homolog_identity"
alternatives:
- "-L"
description: "Genes with more than the given fraction of sequence identity are\
\ \nconsidered homologs and removed by the 'homologs' filter. \nDefault: 0.300000\
\ \n"
info: null
example:
- 0.3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--homopolymer_length"
alternatives:
- "-H"
description: "The 'homopolymer' filter removes breakpoints adjacent to \nhomopolymers\
\ of the given length or more. Default: 6\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_through_distance"
alternatives:
- "-R"
description: "The 'read_through' filter removes read-through fusions \nwhere the\
\ breakpoints are less than the given distance away \nfrom each other. Default:\
\ 10000 \n"
info: null
example:
- 10000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_anchor_length"
alternatives:
- "-A"
description: "Alignment artifacts are often characterized by split reads coming\
\ \nfrom only one gene and no discordant mates. Moreover, the split \nreads\
\ only align to a short stretch in one of the genes. The \n'short_anchor' filter\
\ removes these fusions. This parameter sets \nthe threshold in bp for what\
\ the filter considers short. Default: 23 \n"
info: null
example:
- 23
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--many_spliced_events"
alternatives:
- "-M"
description: "The 'many_spliced' filter recovers fusions between genes that \n\
have at least this many spliced breakpoints. Default: 4\n"
info: null
example:
- 4
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_kmer_content"
alternatives:
- "-K"
description: "The 'low_entropy' filter removes reads with repetitive 3-mers. If\
\ \nthe 3-mers make up more than the given fraction of the sequence, then \n\
the read is discarded. Default: 0.600000 \n"
info: null
example:
- 0.6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_mismatch_pvalue"
alternatives:
- "-V"
description: "The 'mismatches' filter uses a binomial model to calculate a \n\
p-value for observing a given number of mismatches in a read. If \nthe number\
\ of mismatches is too high, the read is discarded. \nDefault: 0.010000 \n"
info: null
example:
- 0.05
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length"
alternatives:
- "-F"
description: "When paired-end data is given, the fragment length is estimated\
\ \nautomatically and this parameter has no effect. But when single-end \ndata\
\ is given, the mean fragment length should be specified to \neffectively filter\
\ fusions that arise from hairpin structures. \nDefault: 200 \n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_reads"
alternatives:
- "-U"
description: "Subsample fusions with more than the given number of supporting\
\ reads. This \nimproves performance without compromising sensitivity, as long\
\ as the \nthreshold is high. Counting of supporting reads beyond the threshold\
\ is \ninaccurate, obviously. Default: 300 \n"
info: null
example:
- 300
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--quantile"
alternatives:
- "-Q"
description: "Highly expressed genes are prone to produce artifacts during library\
\ \npreparation. Genes with an expression above the given quantile are eligible\
\ \nfor filtering by the 'in_vitro' filter. Default: 0.998000\n"
info: null
example:
- 0.998
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--exonic_fraction"
alternatives:
- "-e"
description: "The breakpoints of false-positive predictions of intragenic events\
\ \nare often both in exons. True predictions are more likely to have at \n\
least one breakpoint in an intron, because introns are larger. If the \nfraction\
\ of exonic sequence between two breakpoints is smaller than \nthe given fraction,\
\ the 'intragenic_exonic' filter discards the \nevent. Default: 0.330000 \n"
info: null
example:
- 0.33
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--top_n"
alternatives:
- "-T"
description: "Only report viral integration sites of the top N most highly expressed\
\ viral \ncontigs. Default: 5\n"
info: null
example:
- 5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--covered_fraction"
alternatives:
- "-C"
description: "Ignore virally associated events if the virus is not fully \nexpressed,\
\ i.e., less than the given fraction of the viral contig is \ntranscribed. Default:\
\ 0.050000 \n"
info: null
example:
- 0.05
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_itd_length"
alternatives:
- "-l"
description: "Maximum length of internal tandem duplications. Note: Increasing\
\ \nthis value beyond the default can impair performance and lead to many \n\
false positives. Default: 100 \n"
info: null
example:
- 100
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--min_itd_allele_fraction"
alternatives:
- "-z"
description: "Required fraction of supporting reads to report an internal \ntandem\
\ duplication. Default: 0.070000 \n"
info: null
example:
- 0.07
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_itd_supporting_reads"
alternatives:
- "-Z"
description: "Required absolute number of supporting reads to report an \ninternal\
\ tandem duplication. Default: 10 \n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--skip_duplicate_marking"
alternatives:
- "-u"
description: "Instead of performing duplicate marking itself, Arriba relies on\
\ duplicate marking by a \npreceding program using the BAM_FDUP flag. This makes\
\ sense when unique molecular \nidentifiers (UMI) are used.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--extra_information"
alternatives:
- "-X"
description: "To reduce the runtime and file size, by default, the columns 'fusion_transcript',\
\ \n'peptide_sequence', and 'read_identifiers' are left empty in the file containing\
\ \ndiscarded fusion candidates (see parameter -O). When this flag is set, this\
\ extra \ninformation is reported in the discarded fusions file.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--fill_gaps"
alternatives:
- "-I"
description: "If assembly of the fusion transcript sequence from the supporting\
\ reads is incomplete \n(denoted as '...'), fill the gaps using the assembly\
\ sequence wherever possible. \n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Detect gene fusions from RNA-Seq data"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
cpus: 1
commands:
- "ps"
keywords:
- "Gene fusion"
- "RNA-Seq"
license: "MIT"
references:
doi:
- "10.1101/gr.257246.119"
links:
repository: "https://github.com/suhrig/arriba"
homepage: "https://arriba.readthedocs.io/en/latest/"
documentation: "https://arriba.readthedocs.io/en/latest/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/arriba:2.4.0--h0033a41_2"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\\s\\(.*\\)/arriba: \"\\\
1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/arriba/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/arriba"
executable: "target/executable/arriba/arriba"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

2340
target/executable/arriba/arriba Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,211 @@
name: "bcftools_sort"
namespace: "bcftools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output sorted VCF/BCF file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "string"
name: "--output_type"
alternatives:
- "-O"
description: "Compresses or uncompresses the output.\nThe options are:\n b: compressed\
\ BCF, \n u: uncompressed BCF, \n z: compressed VCF, \n v: uncompressed VCF.\
\ \n"
info: null
required: false
choices:
- "b"
- "u"
- "z"
- "v"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Sorts VCF/BCF files.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Sort"
- "VCF"
- "BCF"
license: "MIT/Expat, GNU"
references:
doi:
- "https://doi.org/10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/bcftools"
homepage: "https://samtools.github.io/bcftools/"
documentation: "https://samtools.github.io/bcftools/bcftools.html#sort"
issue_tracker: "https://github.com/samtools/bcftools/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bcftools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bcftools: \\\"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftools\
\ //p')\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcftools/bcftools_sort/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcftools/bcftools_sort"
executable: "target/executable/bcftools/bcftools_sort/bcftools_sort"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,444 @@
name: "bcl_convert"
version: "v0.2"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
- name: "Dorien Roosen"
roles:
- "author"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--bcl_input_directory"
alternatives:
- "-i"
description: "Input run directory"
info: null
example:
- "bcl_dir"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_sheet"
alternatives:
- "-s"
description: "Path to SampleSheet.csv file (default searched for in --bcl_input_directory)"
info: null
example:
- "bcl_dir/sample_sheet.csv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--run_info"
description: "Path to RunInfo.xml file (default root of BCL input directory)"
info: null
example:
- "bcl_dir/RunInfo.xml"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Lane and tile settings"
arguments:
- type: "integer"
name: "--bcl_only_lane"
description: "Convert only specified lane number (default all lanes)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--first_tile_only"
description: "Only convert first tile of input (for testing & debugging)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--tiles"
description: "Process only a subset of tiles by a regular expression"
info: null
example:
- "s_[0-9]+_1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--exclude_tiles"
description: "Exclude set of tiles by a regular expression"
info: null
example:
- "s_[0-9]+_1"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Resource arguments"
arguments:
- type: "boolean"
name: "--shared_thread_odirect_output"
description: "Use linux native asynchronous io (io_submit) for file output (Default=false)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_parallel_tiles"
description: "\\# of tiles to process in parallel (default 1)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_conversion_threads"
description: "\\# of threads for conversion (per tile, default # cpu threads)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_compression_threads"
description: "\\# of threads for fastq.gz output compression (per tile, default\
\ # cpu threads, or HW+12)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bcl_num_decompression_threads"
description: "\\# of threads for bcl/cbcl input decompression (per tile, default\
\ half # cpu threads, or HW+8). Only applies when preloading files"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Run arguments"
arguments:
- type: "boolean"
name: "--bcl_only_matched_reads"
description: "For pure BCL conversion, do not output files for 'Undetermined'\
\ [unmatched] reads (output by default)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--no_lane_splitting"
description: "Do not split FASTQ file by lane (false by default)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--num_unknown_barcodes_reported"
description: "\\# of Top Unknown Barcodes to output (1000 by default)"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--bcl_validate_sample_sheet_only"
description: "Only validate RunInfo.xml & SampleSheet files (produce no FASTQ\
\ files)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--strict_mode"
description: "Abort if any files are missing (false by default)"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--sample_name_column_enabled"
description: "Use sample sheet 'Sample_Name' column when naming fastq files &\
\ subdirectories"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output_directory"
alternatives:
- "-o"
description: "Output directory containig fastq files"
info: null
example:
- "fastq_dir"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--bcl_sampleproject_subdirectories"
description: "Output to subdirectories based upon sample sheet 'Sample_Project'\
\ column"
info: null
example:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fastq_gzip_compression_level"
description: "Set fastq output compression level 0-9 (default 1)"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reports"
description: "Reports directory"
info: null
example:
- "reports_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--logs"
description: "Reports directory"
info: null
example:
- "logs_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Convert bcl files to fastq files using bcl-convert.\nInformation about\
\ upgrading from bcl2fastq via\n[Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html)\n\
and [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html)\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "demultiplex"
- "fastq"
- "bcl"
- "illumina"
license: "Proprietary"
links:
repository: "https://github.com/viash-hub/biobox"
homepage: "https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html"
documentation: "https://support.illumina.com/downloads/bcl-convert-user-guide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:trixie-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "wget"
- "gdb"
- "which"
- "hostname"
- "alien"
- "procps"
interactive: false
- type: "docker"
run:
- "wget https://s3.amazonaws.com/webdata.illumina.com/downloads/software/bcl-convert/bcl-convert-4.2.7-2.el8.x86_64.rpm\
\ -O /tmp/bcl-convert.rpm && \\\nalien -i /tmp/bcl-convert.rpm && \\\nrm -rf\
\ /var/lib/apt/lists/* && \\\nrm /tmp/bcl-convert.rpm\n"
- type: "docker"
run:
- "echo \"bcl-convert: \\\"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/\
\ s/^bcl-convert\\ Version //p')\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bcl_convert/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bcl_convert"
executable: "target/executable/bcl_convert/bcl_convert"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,296 @@
name: "bd_rhapsody_make_reference"
namespace: "bd_rhapsody"
version: "v0.2"
authors:
- name: "Robrecht Cannoodt"
roles:
- "author"
- "maintainer"
info:
links:
email: "robrecht@data-intuitive.com"
github: "rcannood"
orcid: "0000-0003-3641-729X"
linkedin: "robrechtcannoodt"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Science Engineer"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Core Member"
- name: "Weiwei Schultz"
roles:
- "contributor"
info:
organizations:
- name: "Janssen R&D US"
role: "Associate Director Data Sciences"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--genome_fasta"
description: "Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody\
\ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse."
info:
config_key: "Genome_fasta"
example:
- "genome_sequence.fa.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "File path to the transcript annotation files in GTF or GTF.GZ format.\
\ The Sequence Analysis Pipeline requires the 'gene_name' or \n'gene_id' attribute\
\ to be set on each gene and exon feature. Gene and exon feature lines must\
\ have the same attribute, and exons\nmust have a corresponding gene with the\
\ same value. For TCR/BCR assays, the TCR or BCR gene segments must have the\
\ 'gene_type' or\n'gene_biotype' attribute set, and the value should begin with\
\ 'TR' or 'IG', respectively.\n"
info:
config_key: "Gtf"
example:
- "transcriptome_annotation.gtf.gz"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--extra_sequences"
description: "File path to additional sequences in FASTA format to use when building\
\ the STAR index. (e.g. transgenes or CRISPR guide barcodes).\nGTF lines for\
\ these sequences will be automatically generated and combined with the main\
\ GTF.\n"
info:
config_key: "Extra_sequences"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--reference_archive"
description: "A Compressed archive containing the Reference Genome Index and annotation\
\ GTF files. This archive is meant to be used as an\ninput in the BD Rhapsody\
\ Sequencing Analysis Pipeline.\n"
info: null
example:
- "star_index.tar.gz"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--mitochondrial_contigs"
description: "Names of the Mitochondrial contigs in the provided Reference Genome.\
\ Fragments originating from contigs other than these are\nidentified as 'nuclear\
\ fragments' in the ATACseq analysis pipeline.\n"
info:
config_key: "Mitochondrial_contigs"
default:
- "chrM"
- "chrMT"
- "M"
- "MT"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--filtering_off"
description: "By default the input Transcript Annotation files are filtered based\
\ on the gene_type/gene_biotype attribute. Only features \nhaving the following\
\ attribute values are kept:\n\n - protein_coding\n - lncRNA (lincRNA and\
\ antisense for Gencode < v31/M22/Ensembl97)\n - IG_LV_gene\n - IG_V_gene\n\
\ - IG_V_pseudogene\n - IG_D_gene\n - IG_J_gene\n - IG_J_pseudogene\n -\
\ IG_C_gene\n - IG_C_pseudogene\n - TR_V_gene\n - TR_V_pseudogene\n - TR_D_gene\n\
\ - TR_J_gene\n - TR_J_pseudogene\n - TR_C_gene\n\n If you have already\
\ pre-filtered the input Annotation files and/or wish to turn-off the filtering,\
\ please set this option to True.\n"
info:
config_key: "Filtering_off"
direction: "input"
- type: "boolean_true"
name: "--wta_only_index"
description: "Build a WTA only index, otherwise builds a WTA + ATAC index."
info:
config_key: "Wta_Only"
direction: "input"
- type: "string"
name: "--extra_star_params"
description: "Additional parameters to pass to STAR when building the genome index.\
\ Specify exactly like how you would on the command line."
info:
config_key: "Extra_STAR_params"
example:
- "--limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "make_rhap_reference_2.2.1_nodocker.cwl"
description: "The Reference Files Generator creates an archive containing Genome Index\n\
and Transcriptome annotation files needed for the BD Rhapsody Sequencing\nAnalysis\
\ Pipeline. The app takes as input one or more FASTA and GTF files\nand produces\
\ a compressed archive in the form of a tar.gz file. The \narchive contains:\n\n\
- STAR index\n- Filtered GTF file\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "genome"
- "reference"
- "index"
- "align"
license: "Unknown"
links:
repository: "https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/"
documentation: "https://bd-rhapsody-bioinfo-docs.genomics.bd.com/resources/extra_utilities.html#make-rhapsody-reference"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "bdgenomics/rhapsody:2.2.1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "cwlref-runner"
- "cwl-runner"
upgrade: true
- type: "docker"
run:
- "echo \"bdgenomics/rhapsody: 2.2.1\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bd_rhapsody/bd_rhapsody_make_reference"
executable: "target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,115 @@
requirements:
InlineJavascriptRequirement: {}
class: CommandLineTool
label: Reference Files Generator for BD Rhapsodyâ„¢ Sequencing Analysis Pipeline
cwlVersion: v1.2
doc: >-
The Reference Files Generator creates an archive containing Genome Index and Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. The app takes as input one or more FASTA and GTF files and produces a compressed archive in the form of a tar.gz file. The archive contains:\n - STAR index\n - Filtered GTF file
baseCommand: run_reference_generator.sh
inputs:
Genome_fasta:
type: File[]
label: Reference Genome
doc: |-
Reference genome file in FASTA format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
inputBinding:
prefix: --reference-genome
shellQuote: false
Gtf:
type: File[]
label: Transcript Annotations
doc: |-
Transcript annotation files in GTF format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode v42 for Human and M31 for Mouse.
inputBinding:
prefix: --gtf
shellQuote: false
Extra_sequences:
type: File[]?
label: Extra Sequences
doc: |-
Additional sequences in FASTA format to use when building the STAR index. (E.g. phiX genome)
inputBinding:
prefix: --extra-sequences
shellQuote: false
Mitochondrial_Contigs:
type: string[]?
default: ["chrM", "chrMT", "M", "MT"]
label: Mitochondrial Contig Names
doc: |-
Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are identified as 'nuclear fragments' in the ATACseq analysis pipeline.
inputBinding:
prefix: --mitochondrial-contigs
shellQuote: false
Filtering_off:
type: boolean?
label: Turn off filtering
doc: |-
By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features having the following attribute values are are kept:
- protein_coding
- lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
- IG_LV_gene
- IG_V_gene
- IG_V_pseudogene
- IG_D_gene
- IG_J_gene
- IG_J_pseudogene
- IG_C_gene
- IG_C_pseudogene
- TR_V_gene
- TR_V_pseudogene
- TR_D_gene
- TR_J_gene
- TR_J_pseudogene
- TR_C_gene
If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
inputBinding:
prefix: --filtering-off
shellQuote: false
WTA_Only:
type: boolean?
label: WTA only index
doc: Build a WTA only index, otherwise builds a WTA + ATAC index.
inputBinding:
prefix: --wta-only-index
shellQuote: false
Archive_prefix:
type: string?
label: Archive Prefix
doc: |-
A prefix for naming the compressed archive file containing the Reference genome index and annotation files. The default value is constructed based on the input Reference files.
inputBinding:
prefix: --archive-prefix
shellQuote: false
Extra_STAR_params:
type: string?
label: Extra STAR Params
doc: |-
Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
Example:
--limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
inputBinding:
prefix: --extra-star-params
shellQuote: true
Maximum_threads:
type: int?
label: Maximum Number of Threads
doc: |-
The maximum number of threads to use in the pipeline. By default, all available cores are used.
inputBinding:
prefix: --maximum-threads
shellQuote: false
outputs:
Archive:
type: File
doc: |-
A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an input in the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline.
id: Reference_Archive
label: Reference Files Archive
outputBinding:
glob: '*.tar.gz'

View File

@@ -0,0 +1,213 @@
name: "bedtools_bamtofastq"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input BAM file to be converted to FASTQ."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--fastq"
alternatives:
- "-fq"
description: "Output FASTQ file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq2"
alternatives:
- "-fq2"
description: "FASTQ for second end. Used if BAM contains paired-end data.\nBAM\
\ should be sorted by query name is creating paired FASTQ.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--tags"
description: "Create FASTQ based on the mate info in the BAM R2 and Q2 tags.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Conversion tool for extracting FASTQ records from sequence alignments\
\ in BAM format.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Conversion"
- "BAM"
- "FASTQ"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bamtofastq.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bamtofastq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bamtofastq"
executable: "target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,202 @@
name: "bedtools_bed12tobed6"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input BED12 file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output BED6 file to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--n_score"
alternatives:
- "-n"
description: "Force the score to be the (1-based) block number from the BED12.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts BED features in BED12 (a.k.a. “blocked” BED features such as\
\ genes) to discrete BED6 features.\nFor example, in the case of a gene with six\
\ exons, bed12ToBed6 would create six separate BED6 features (i.e., one for each\
\ exon).\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Converts"
- "BED12"
- "BED6"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bed12tobed6.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bed12tobed6/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bed12tobed6"
executable: "target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,240 @@
name: "bedtools_bedtobam"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (bed/gff/vcf)."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Input genome file.\nNOTE: This is not a fasta file. This is a two-column\
\ tab-delimited file\nwhere the first column is the chromosome name and the\
\ second their sizes.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output BAM file to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--map_quality"
alternatives:
- "-mapq"
description: "Set the mappinq quality for the BAM records.\n"
info: null
default:
- 255
required: false
min: 0
max: 255
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bed12"
description: "The BED file is in BED12 format. The BAM CIGAR\nstring will reflect\
\ BED \"blocks\".\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--uncompress_bam"
alternatives:
- "-ubam"
description: "Write uncompressed BAM output. Default writes compressed BAM.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts feature records (bed/gff/vcf) to BAM format."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Converts"
- "BED"
- "GFF"
- "VCF"
- "BAM"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/bedtobam.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "samtools"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_bedtobam/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_bedtobam"
executable: "target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,258 @@
name: "bedtools_getfasta"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_fasta"
description: "FASTA file containing sequences for each interval specified in the\
\ input BED file.\nThe headers in the input FASTA file must exactly match the\
\ chromosome column in the BED file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_bed"
description: "BED file containing intervals to extract from the FASTA file.\n\
BED files containing a single region require a newline character\nat the end\
\ of the line, otherwise a blank output file is produced.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--rna"
description: "The FASTA is RNA not DNA. Reverse complementation handled accordingly.\n"
info: null
direction: "input"
- name: "Run arguments"
arguments:
- type: "boolean_true"
name: "--strandedness"
alternatives:
- "-s"
description: "Force strandedness. If the feature occupies the antisense strand,\
\ the output sequence will\nbe reverse complemented. By default strandedness\
\ is not taken into account.\n"
info: null
direction: "input"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file where the output from the 'bedtools getfasta' commend\
\ will\nbe written to.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--tab"
description: "Report extract sequences in a tab-delimited format instead of in\
\ FASTA format.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed_out"
description: "Report extract sequences in a tab-delimited BED format instead of\
\ in FASTA format.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--name"
description: "Set the FASTA header for each extracted sequence to be the \"name\"\
\ and coordinate columns from the BED feature.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--name_only"
description: "Set the FASTA header for each extracted sequence to be the \"name\"\
\ columns from the BED feature.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "When --input is in BED12 format, create a separate fasta entry for\
\ each block in a BED12 record,\nblocks being described in the 11th and 12th\
\ column of the BED.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--full_header"
description: "Use full fasta header. By default, only the word before the first\
\ space or tab is used.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Extract sequences from a FASTA file for each of the intervals defined\
\ in a BED/GFF/VCF file."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "sequencing"
- "fasta"
- "BED"
- "GFF"
- "VCF"
license: "GPL-2.0"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/getfasta.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_getfasta/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_getfasta"
executable: "target/executable/bedtools/bedtools_getfasta/bedtools_getfasta"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,299 @@
name: "bedtools_groupby"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "The input BED file to be used.\n"
info: null
example:
- "input_a.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output groupby BED file. \n"
info: null
example:
- "output.bed"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "string"
name: "--groupby"
alternatives:
- "-g"
- "-grp"
description: "Specify the columns (1-based) for the grouping.\nThe columns must\
\ be comma separated.\n- Default: 1,2,3 \n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--column"
alternatives:
- "-c"
- "-opCols"
description: "Specify the column (1-based) that should be summarized.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--operation"
alternatives:
- "-o"
- "-ops"
description: "Specify the operation that should be applied to opCol.\nValid operations:\n\
\ sum, count, count_distinct, min, max,\n mean, median, mode, antimode,\n\
\ stdev, sstdev (sample standard dev.),\n collapse (i.e., print a comma\
\ separated list (duplicates allowed)), \n distinct (i.e., print a comma\
\ separated list (NO duplicates allowed)), \n distinct_sort_num (as distinct,\
\ but sorted numerically, ascending), \n distinct_sort_num_desc (as distinct,\
\ but sorted numerically, descending), \n concat (i.e., merge values into\
\ a single, non-delimited string), \n freqdesc (i.e., print desc. list of\
\ values:freq)\n freqasc (i.e., print asc. list of values:freq)\n first\
\ (i.e., print first value)\n last (i.e., print last value)\n\nDefault value:\
\ sum \n\nIf there is only column, but multiple operations, all operations\
\ will be\napplied on that column. Likewise, if there is only one operation,\
\ but\nmultiple columns, that operation will be applied to all columns.\nOtherwise,\
\ the number of columns must match the the number of operations,\nand will be\
\ applied in respective order.\nE.g., \"-c 5,4,6 -o sum,mean,count\" will give\
\ the sum of column 5,\nthe mean of column 4, and the count of column 6.\nThe\
\ order of output columns will match the ordering given in the command.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--full"
description: "Print all columns from input file. The first line in the group is\
\ used.\nDefault: print only grouped columns.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--inheader"
description: "Input file has a header line - the first line will be ignored.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--outheader"
description: "Print header line in the output, detailing the column names. \n\
If the input file has headers (-inheader), the output file\nwill use the input's\
\ column names.\nIf the input file has no headers, the output file\nwill use\
\ \"col_1\", \"col_2\", etc. as the column names.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--header"
description: "same as '-inheader -outheader'."
info: null
direction: "input"
- type: "boolean_true"
name: "--ignorecase"
description: "Group values regardless of upper/lower case.\n"
info: null
direction: "input"
- type: "integer"
name: "--precision"
alternatives:
- "-prec"
description: "Sets the decimal precision for output. \n"
info: null
default:
- 5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--delimiter"
alternatives:
- "-delim"
description: "Specify a custom delimiter for the collapse operations.\n"
info: null
example:
- "|"
default:
- ","
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Summarizes a dataset column based upon common column groupings. \nAkin\
\ to the SQL \"group by\" command.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "groupby"
- "BED"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/groupby.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_groupby/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_groupby"
executable: "target/executable/bedtools/bedtools_groupby/bedtools_groupby"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,436 @@
name: "bedtools_intersect"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input_a"
alternatives:
- "-a"
description: "The input file (BED/GFF/VCF/BAM) to be used as the -a file.\n"
info: null
example:
- "input_a.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_b"
alternatives:
- "-b"
description: "The input file(s) (BED/GFF/VCF/BAM) to be used as the -b file(s).\n"
info: null
example:
- "input_b.bed"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output BED file. \n"
info: null
example:
- "output.bed"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--write_a"
alternatives:
- "-wa"
description: "Write the original A entry for each overlap."
info: null
direction: "input"
- type: "boolean_true"
name: "--write_b"
alternatives:
- "-wb"
description: "Write the original B entry for each overlap. \nUseful for knowing\
\ _what_ A overlaps. Restricted by -f and -r.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--left_outer_join"
alternatives:
- "-loj"
description: "Perform a \"left outer join\". That is, for each feature in A report\
\ each overlap with B. \nIf no overlaps are found, report a NULL feature for\
\ B.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--write_overlap"
alternatives:
- "-wo"
description: "Write the original A and B entries plus the number of base pairs\
\ of overlap between the two features.\n- Overlaps restricted by -f and -r.\
\ \n Only A features with overlap are reported.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--write_overlap_plus"
alternatives:
- "-wao"
description: "Write the original A and B entries plus the number of base pairs\
\ of overlap between the two features.\n- Overlaps restricted by -f and -r.\
\ \n However, A features w/o overlap are also reported with a NULL B feature\
\ and overlap = 0.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--report_A_if_no_overlap"
alternatives:
- "-u"
description: "Write the original A entry _if_ no overlap is found. \n- In other\
\ words, just report the fact >=1 hit was found.\n- Overlaps restricted by -f\
\ and -r. \n"
info: null
direction: "input"
- type: "boolean_true"
name: "--number_of_overlaps_A"
alternatives:
- "-c"
description: "For each entry in A, report the number of overlaps with B.\n- Reports\
\ 0 for A entries that have no overlap with B.\n- Overlaps restricted by -f\
\ and -r.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--report_no_overlaps_A"
alternatives:
- "-v"
description: "Only report those entries in A that have _no overlaps_ with B.\n\
- Similar to \"grep -v\" (an homage).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--uncompressed_bam"
alternatives:
- "-ubam"
description: "Write uncompressed BAM output. Default writes compressed BAM."
info: null
direction: "input"
- type: "boolean_true"
name: "--same_strand"
alternatives:
- "-s"
description: "Require same strandedness. That is, only report hits in B.\nthat\
\ overlap A on the _same_ strand.\n- By default, overlaps are reported without\
\ respect to strand.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--opposite_strand"
alternatives:
- "-S"
description: "Require different strandedness. That is, only report hits in B\n\
that overlap A on the _opposite_ strand.\n- By default, overlaps are reported\
\ without respect to strand.\n"
info: null
direction: "input"
- type: "double"
name: "--min_overlap_A"
alternatives:
- "-f"
description: "Minimum overlap required as a fraction of A.\n- Default is 1E-9\
\ (i.e., 1bp).\n- FLOAT (e.g. 0.50)\n"
info: null
example:
- 0.5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--min_overlap_B"
alternatives:
- "-F"
description: "Minimum overlap required as a fraction of B.\n- Default is 1E-9\
\ (i.e., 1bp).\n- FLOAT (e.g. 0.50)\n"
info: null
example:
- 0.5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--reciprocal_overlap"
alternatives:
- "-r"
description: "Require that the fraction overlap be reciprocal for A AND B.\n-\
\ In other words, if -f is 0.90 and -r is used, this requires\nthat B overlap\
\ 90% of A and A _also_ overlaps 90% of B.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--either_overlap"
alternatives:
- "-e"
description: "Require that the minimum fraction be satisfied for A OR B.\n- In\
\ other words, if -e is used with -f 0.90 and -F 0.10 this requires\nthat either\
\ 90% of A is covered OR 10% of B is covered.\nWithout -e, both fractions would\
\ have to be satisfied.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals."
info: null
direction: "input"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Provide a genome file to enforce consistent chromosome \nsort order\
\ across input files. Only applies when used \nwith -sorted option.\n"
info: null
example:
- "genome.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--nonamecheck"
description: "For sorted data, don't throw an error if the file \nhas different\
\ naming conventions for the same chromosome \n(e.g., \"chr1\" vs \"chr01\"\
).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--sorted"
description: "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input.\n"
info: null
direction: "input"
- type: "string"
name: "--names"
description: "When using multiple databases, provide an alias \nfor each that\
\ will appear instead of a fileId when \nalso printing the DB record.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--filenames"
description: "When using multiple databases, show each complete filename instead\
\ of a fileId when also printing the DB record."
info: null
direction: "input"
- type: "boolean_true"
name: "--sortout"
description: "When using multiple databases, sort the output DB hits for each\
\ record."
info: null
direction: "input"
- type: "boolean_true"
name: "--bed"
description: "If using BAM input, write output as BED."
info: null
direction: "input"
- type: "boolean_true"
name: "--header"
description: "Print the header from the A file prior to results."
info: null
direction: "input"
- type: "boolean_true"
name: "--no_buffer_output"
alternatives:
- "--nobuf"
description: "Disable buffered output. Using this option will cause each line\n\
of output to be printed as it is generated, rather than saved\nin a buffer.\
\ This will make printing large output files \nnoticeably slower, but can be\
\ useful in conjunction with\nother software tools and scripts that need to\
\ process one\nline of bedtools output at a time.\n"
info: null
direction: "input"
- type: "integer"
name: "--io_buffer_size"
alternatives:
- "--iobuf"
description: "Specify amount of memory to use for input buffer.\nTakes an integer\
\ argument. Optional suffixes K/M/G supported.\nNote: currently has no effect\
\ with compressed files. \n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "bedtools intersect allows one to screen for overlaps between two sets\
\ of genomic features. \nMoreover, it allows one to have fine control as to how\
\ the intersections are reported. \nbedtools intersect works with both BED/GFF/VCF\
\ and BAM files as input.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "feature intersection"
- "BAM"
- "BED"
- "GFF"
- "VCF"
license: "GPL-2.0, MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_intersect/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_intersect"
executable: "target/executable/bedtools/bedtools_intersect/bedtools_intersect"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,236 @@
name: "bedtools_links"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (bed/gff/vcf)."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output HTML file to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
description: "By default, the links created will point to human (hg18) UCSC browser.\n\
If you have a local mirror, you can override this behavior by supplying\nthe -base,\
\ -org, and -db options.\n\nFor example, if the URL of your local mirror for mouse\
\ MM9 is called: \nhttp://mymirror.myuniversity.edu, then you would use the following:\n\
--base_url http://mymirror.myuniversity.edu\n--organism mouse\n--database mm9\n"
arguments:
- type: "string"
name: "--base_url"
alternatives:
- "-base"
description: "The “basename” for the UCSC browser.\n"
info: null
default:
- "http://genome.ucsc.edu"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--organism"
alternatives:
- "-org"
description: "The organism (e.g. mouse, human). \n"
info: null
default:
- "human"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--database"
alternatives:
- "-db"
description: "The genome build. \n"
info: null
default:
- "hg18"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Creates an HTML file with links to an instance of the UCSC Genome Browser\
\ for all features / intervals in a file. \nThis is useful for cases when one wants\
\ to manually inspect through a large set of annotations or features.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Links"
- "BED"
- "GFF"
- "VCF"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/links.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_links/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_links"
executable: "target/executable/bedtools/bedtools_links/bedtools_links"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,305 @@
name: "bedtools_merge"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (BED/GFF/VCF) to be merged."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Output merged file BED to be written."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--strand"
alternatives:
- "-s"
description: "Force strandedness. That is, only merge features\nthat are on the\
\ same strand.\n- By default, merging is done without respect to strand.\n"
info: null
direction: "input"
- type: "string"
name: "--specific_strand"
alternatives:
- "-S"
description: "Force merge for one specific strand only.\nFollow with + or - to\
\ force merge from only\nthe forward or reverse strand, respectively.\n- By\
\ default, merging is done without respect to strand.\n"
info: null
required: false
choices:
- "+"
- "-"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--distance"
alternatives:
- "-d"
description: "Maximum distance between features allowed for features\nto be merged.\n\
- Def. 0. That is, overlapping & book-ended features are merged.\n- (INTEGER)\n\
- Note: negative values enforce the number of b.p. required for overlap.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--columns"
alternatives:
- "-c"
description: "Specify columns from the B file to map onto intervals in A.\nDefault:\
\ 5.\nMultiple columns can be specified in a comma-delimited list.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--operation"
alternatives:
- "-o"
description: "Specify the operation that should be applied to -c.\nValid operations:\n\
\ sum, min, max, absmin, absmax,\n mean, median, mode, antimode\n stdev,\
\ sstdev\n collapse (i.e., print a delimited list (duplicates allowed)),\
\ \n distinct (i.e., print a delimited list (NO duplicates allowed)), \n\
\ distinct_sort_num (as distinct, sorted numerically, ascending),\n distinct_sort_num_desc\
\ (as distinct, sorted numerically, desscending),\n distinct_only (delimited\
\ list of only unique values),\n count\n count_distinct (i.e., a count\
\ of the unique values in the column), \n first (i.e., just the first value\
\ in the column), \n last (i.e., just the last value in the column), \nDefault:\
\ sum\nMultiple operations can be specified in a comma-delimited list.\n\nIf\
\ there is only column, but multiple operations, all operations will be\napplied\
\ on that column. Likewise, if there is only one operation, but\nmultiple columns,\
\ that operation will be applied to all columns.\nOtherwise, the number of columns\
\ must match the the number of operations,\nand will be applied in respective\
\ order.\nE.g., \"-c 5,4,6 -o sum,mean,count\" will give the sum of column 5,\n\
the mean of column 4, and the count of column 6.\nThe order of output columns\
\ will match the ordering given in the command.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--delimiter"
alternatives:
- "-delim"
description: "Specify a custom delimiter for the collapse operations.\n"
info: null
example:
- "|"
default:
- ","
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--precision"
alternatives:
- "-prec"
description: "Sets the decimal precision for output (Default: 5).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bed"
description: "If using BAM input, write output as BED.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--header"
description: "Print the header from the A file prior to results.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_buffer"
alternatives:
- "-nobuf"
description: "Disable buffered output. Using this option will cause each line\n\
of output to be printed as it is generated, rather than saved\nin a buffer.\
\ This will make printing large output files \nnoticeably slower, but can be\
\ useful in conjunction with\nother software tools and scripts that need to\
\ process one\nline of bedtools output at a time.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Merges overlapping BED/GFF/VCF entries into a single interval.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/merge.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_merge/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_merge"
executable: "target/executable/bedtools/bedtools_merge/bedtools_merge"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,248 @@
name: "bedtools_sort"
namespace: "bedtools"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input file (bed/gff/vcf) to be sorted."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output sorted file (bed/gff/vcf) to be written."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--sizeA"
description: "Sort by feature size in ascending order."
info: null
direction: "input"
- type: "boolean_true"
name: "--sizeD"
description: "Sort by feature size in descending order."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenSizeA"
description: "Sort by chrom (asc), then feature size (asc)."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenSizeD"
description: "Sort by chrom (asc), then feature size (desc)."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenScoreA"
description: "Sort by chrom (asc), then score (asc)."
info: null
direction: "input"
- type: "boolean_true"
name: "--chrThenScoreD"
description: "Sort by chrom (asc), then score (desc)."
info: null
direction: "input"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Sort according to the chromosomes declared in \"genome.txt\""
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--faidx"
description: "Sort according to the chromosomes declared in \"names.txt\""
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--header"
description: "Print the header from the A file prior to results."
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Sorts a feature file (bed/gff/vcf) by chromosome and other criteria."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "sort"
- "BED"
- "GFF"
- "VCF"
license: "GPL-2.0, MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/sort.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_sort/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/bedtools/bedtools_sort"
executable: "target/executable/bedtools/bedtools_sort/bedtools_sort"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,184 @@
name: "busco_download_datasets"
namespace: "busco"
version: "v0.2"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "string"
name: "--download"
description: "Download dataset. Possible values are a specific dataset name, \"\
all\", \"prokaryota\", \"eukaryota\", or \"virus\".\nThe full list of available\
\ datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/)\
\ or by running the busco/busco_list_datasets component.\n"
info: null
example:
- "stramenopiles_odb10"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--download_path"
description: "Local filepath for storing BUSCO dataset downloads\n"
info: null
example:
- "busco_downloads"
default:
- "busco_downloads"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Downloads available busco datasets"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "lineage datasets"
license: "MIT"
references:
doi:
- "10.1007/978-1-4939-9173-0_14"
links:
repository: "https://gitlab.com/ezlab/busco"
homepage: "https://busco.ezlab.org/"
documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/busco/busco_download_datasets/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/busco/busco_download_datasets"
executable: "target/executable/busco/busco_download_datasets/busco_download_datasets"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,171 @@
name: "busco_list_datasets"
namespace: "busco"
version: "v0.2"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file of the available busco datasets\n"
info: null
example:
- "file.txt"
default:
- "busco_dataset_list.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Lists the available busco datasets"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "lineage datasets"
license: "MIT"
references:
doi:
- "10.1007/978-1-4939-9173-0_14"
links:
repository: "https://gitlab.com/ezlab/busco"
homepage: "https://busco.ezlab.org/"
documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/busco/busco_list_datasets/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/busco/busco_list_datasets"
executable: "target/executable/busco/busco_list_datasets/busco_list_datasets"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,449 @@
name: "busco_run"
namespace: "busco"
version: "v0.2"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Input sequence file in FASTA format. Can be an assembled genome\
\ or transcriptome (DNA), or protein sequences from an annotated gene set. Also\
\ possible to use a path to a directory containing multiple input files.\n"
info: null
example:
- "file.fasta"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--mode"
alternatives:
- "-m"
description: "Specify which BUSCO analysis mode to run. There are three valid\
\ modes:\n - geno or genome, for genome assemblies (DNA)\n - tran or transcriptome,\
\ for transcriptome assemblies (DNA)\n - prot or proteins, for annotated gene\
\ sets (protein)\n"
info: null
example:
- "proteins"
required: true
choices:
- "genome"
- "geno"
- "transcriptome"
- "tran"
- "proteins"
- "prot"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--lineage_dataset"
alternatives:
- "-l"
description: "Specify a BUSCO lineage dataset that is most closely related to\
\ the assembly or gene set being assessed. \nThe full list of available datasets\
\ can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by\
\ running the busco/busco_list_datasets component.\nWhen unsure, the \"--auto_lineage\"\
\ flag can be set to automatically find the optimal lineage path.\nBUSCO will\
\ automatically download the requested dataset if it is not already present\
\ in the download folder. \nYou can optionally provide a path to a local dataset\
\ instead of a name, e.g. path/to/dataset.\nDatasets can be downloaded using\
\ the busco/busco_download_dataset component.\n"
info: null
example:
- "stramenopiles_odb10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--short_summary_json"
description: "Output file for short summary in JSON format.\n"
info: null
example:
- "short_summary.json"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--short_summary_txt"
description: "Output file for short summary in TXT format.\n"
info: null
example:
- "short_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--full_table"
description: "Full table output in TSV format.\n"
info: null
example:
- "full_table.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--missing_busco_list"
description: "Missing list output in TSV format.\n"
info: null
example:
- "missing_busco_list.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_dir"
description: "The full output directory, if so desired.\n"
info: null
example:
- "output_dir"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Resource and Run Settings"
arguments:
- type: "boolean_true"
name: "--force"
description: "Force rewriting of existing files. Must be used when output files\
\ with the provided name already exist.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Disable the info logs, displays only errors.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--restart"
alternatives:
- "-r"
description: "Continue a run that had already partially completed. Restarting\
\ skips calls to tools that have completed but performs all pre- and post-processing\
\ steps.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--tar"
description: "Compress some subdirectories with many files to save space.\n"
info: null
direction: "input"
- name: "Lineage Dataset Settings"
arguments:
- type: "boolean_true"
name: "--auto_lineage"
description: "Run auto-lineage pipelilne to automatically determine BUSCO lineage\
\ dataset that is most closely related to the assembly or gene set being assessed.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--auto_lineage_euk"
description: "Run auto-placement just on eukaryota tree to find optimal lineage\
\ path.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--auto_lineage_prok"
description: "Run auto_lineage just on prokaryota trees to find optimum lineage\
\ path.\n"
info: null
direction: "input"
- type: "string"
name: "--datasets_version"
description: "Specify the version of BUSCO datasets\n"
info: null
example:
- "odb10"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Augustus Settings"
arguments:
- type: "boolean_true"
name: "--augustus"
description: "Use augustus gene predictor for eukaryote runs.\n"
info: null
direction: "input"
- type: "string"
name: "--augustus_parameters"
description: "Additional parameters to be passed to Augustus (see Augustus documentation:\
\ https://github.com/Gaius-Augustus/Augustus/blob/master/docs/RUNNING-AUGUSTUS.md).\n\
Parameters should be contained within a single string, without whitespace and\
\ seperated by commas.\n"
info: null
example:
- "--PARAM1=VALUE1,--PARAM2=VALUE2"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--augustus_species"
description: "Specify the augustus species\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--long"
description: "Optimize Augustus self-training mode. This adds considerably to\
\ the run time, but can improve results for some non-model organisms.\n"
info: null
direction: "input"
- name: "BBTools Settings"
arguments:
- type: "integer"
name: "--contig_break"
description: "Number of contiguous Ns to signify a break between contigs in BBTools\
\ analysis.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--limit"
description: "Number of candidate regions (contig or transcript) from the BLAST\
\ output to consider per BUSCO.\nThis option is only effective in pipelines\
\ using BLAST, i.e. the genome pipeline (see --augustus) or the prokaryota transcriptome\
\ pipeline.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--scaffold_composition"
description: "Writes ACGTN content per scaffold to a file scaffold_composition.txt.\n"
info: null
direction: "input"
- name: "BLAST Settings"
arguments:
- type: "double"
name: "--e_value"
description: "E-value cutoff for BLAST searches.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Protein Gene Prediction settings"
arguments:
- type: "boolean_true"
name: "--miniprot"
description: "Use Miniprot gene predictor.\n"
info: null
direction: "input"
- name: "MetaEuk Settings"
arguments:
- type: "boolean_true"
name: "--metaeuk"
description: "Use Metaeuk gene predictor.\n"
info: null
direction: "input"
- type: "string"
name: "--metaeuk_parameters"
description: "Pass additional arguments to Metaeuk for the first run (see Metaeuk\
\ documentation https://github.com/soedinglab/metaeuk).\nAll parameters should\
\ be contained within a single string with no white space, with each parameter\
\ separated by a comma.\n"
info: null
example:
- "--max-overlap=15,--min-exon-aa=15"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--metaeuk_rerun_parameters"
description: "Pass additional arguments to Metaeuk for the second run (see Metaeuk\
\ documentation https://github.com/soedinglab/metaeuk).\nAll parameters should\
\ be contained within a single string with no white space, with each parameter\
\ separated by a comma.\n"
info: null
example:
- "--max-overlap=15,--min-exon-aa=15"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Assessment of genome assembly and annotation completeness with single\
\ copy orthologs"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Genome assembly"
- "quality control"
license: "MIT"
references:
doi:
- "10.1007/978-1-4939-9173-0_14"
links:
repository: "https://gitlab.com/ezlab/busco"
homepage: "https://busco.ezlab.org/"
documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/busco/busco_run/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/busco/busco_run"
executable: "target/executable/busco/busco_run/busco_run"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,766 @@
name: "cutadapt"
version: "v0.2"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
argument_groups:
- name: "Specify Adapters for R1"
arguments:
- type: "string"
name: "--adapter"
alternatives:
- "-a"
description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
\ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
\ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
\ the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--front"
alternatives:
- "-g"
description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
\ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
\ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
\ the adapter is\nonly found if it is a prefix of the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--anywhere"
alternatives:
- "-b"
description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
\ (paired data: of the first read). Both types of\nmatches as described under\
\ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
\ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
for rescuing failed library preparations - do not use if\nyou know which end\
\ your adapter was ligated to!\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R1"
arguments:
- type: "file"
name: "--adapter_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 3'\
\ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
\ found if it is a suffix of the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--front_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 5'\
\ end (paired data:\nof the first read). The adapter and any preceding bases\n\
are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
\ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
\ the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--anywhere_fasta"
description: "Fasta file containing sequences of an adapter that may be ligated\
\ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
\ as described under -a and -g are allowed. If the\nfirst base of the read is\
\ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
\ option is mostly\nfor rescuing failed library preparations - do not use if\n\
you know which end your adapter was ligated to!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Specify Adapters for R2"
arguments:
- type: "string"
name: "--adapter_r2"
alternatives:
- "-A"
description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
\ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
\ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
\ the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--front_r2"
alternatives:
- "-G"
description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
\ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
\ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
\ the adapter is\nonly found if it is a prefix of the read.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--anywhere_r2"
alternatives:
- "-B"
description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
\ (paired data: of the first read). Both types of\nmatches as described under\
\ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
\ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
for rescuing failed library preparations - do not use if\nyou know which end\
\ your adapter was ligated to!\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R2"
arguments:
- type: "file"
name: "--adapter_r2_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 3'\
\ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
\ found if it is a suffix of the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--front_r2_fasta"
description: "Fasta file containing sequences of an adapter ligated to the 5'\
\ end (paired data:\nof the first read). The adapter and any preceding bases\n\
are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
\ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
\ the read.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--anywhere_r2_fasta"
description: "Fasta file containing sequences of an adapter that may be ligated\
\ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
\ as described under -a and -g are allowed. If the\nfirst base of the read is\
\ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
\ option is mostly\nfor rescuing failed library preparations - do not use if\n\
you know which end your adapter was ligated to!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Paired-end options"
arguments:
- type: "boolean_true"
name: "--pair_adapters"
description: "Treat adapters given with -a/-A etc. as pairs. Either both\nor none\
\ are removed from each read pair.\n"
info: null
direction: "input"
- type: "string"
name: "--pair_filter"
description: "Which of the reads in a paired-end read have to match the\nfiltering\
\ criterion in order for the pair to be filtered.\n"
info: null
required: false
choices:
- "any"
- "both"
- "first"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--interleaved"
description: "Read and/or write interleaved paired-end reads.\n"
info: null
direction: "input"
- name: "Input parameters"
arguments:
- type: "file"
name: "--input"
description: "Input fastq file for single-end reads or R1 for paired-end reads.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Input fastq file for R2 in the case of paired-end reads.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--error_rate"
alternatives:
- "-E"
- "--errors"
description: "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber\
\ of errors for full-length adapter match (if E is an\ninteger >= 1). Error\
\ rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n"
info: null
example:
- 0.1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_false"
name: "--no_indels"
description: "Allow only mismatches in alignments.\n"
info: null
direction: "input"
- type: "integer"
name: "--times"
alternatives:
- "-n"
description: "Remove up to COUNT adapters from each read. Default: 1.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--overlap"
alternatives:
- "-O"
description: "Require MINLENGTH overlap between read and adapter for an\nadapter\
\ to be found. The default is 3.\n"
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--match_read_wildcards"
description: "Interpret IUPAC wildcards in reads.\n"
info: null
direction: "input"
- type: "boolean_false"
name: "--no_match_adapter_wildcards"
description: "Do not interpret IUPAC wildcards in adapters.\n"
info: null
direction: "input"
- type: "string"
name: "--action"
description: "What to do if a match was found. trim: trim adapter and\nup- or\
\ downstream sequence; retain: trim, but retain\nadapter; mask: replace with\
\ 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\n\
The default is trim.\n"
info: null
example:
- "trim"
required: false
choices:
- "trim"
- "retain"
- "mask"
- "lowercase"
- "none"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--revcomp"
alternatives:
- "--rc"
description: "Check both the read and its reverse complement for adapter\nmatches.\
\ If match is on reverse-complemented version,\noutput that one.\n"
info: null
direction: "input"
- name: "Demultiplexing options"
arguments:
- type: "string"
name: "--demultiplex_mode"
description: "Enable demultiplexing and set the mode for it.\nWith mode 'unique_dual',\
\ adapters from the first and second read are used,\nand the indexes from the\
\ reads are only used in pairs. This implies\n--pair_adapters.\nEnabling mode\
\ 'combinatorial_dual' allows all combinations of the sets of indexes\non R1\
\ and R2. It is necessary to write each read pair to an output\nfile depending\
\ on the adapters found on both R1 and R2.\nMode 'single', uses indexes or barcodes\
\ located at the 5'\nend of the R1 read (single). \n"
info: null
required: false
choices:
- "single"
- "unique_dual"
- "combinatorial_dual"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Read modifications"
arguments:
- type: "integer"
name: "--cut"
alternatives:
- "-u"
description: "Remove LEN bases from each read (or R1 if paired; use --cut_r2\n\
option for R2). If LEN is positive, remove bases from the\nbeginning. If LEN\
\ is negative, remove bases from the end.\nCan be used twice if LENs have different\
\ signs. Applied\n*before* adapter trimming.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "integer"
name: "--cut_r2"
description: "Remove LEN bases from each read (for R2). If LEN is positive, remove\
\ bases from the\nbeginning. If LEN is negative, remove bases from the end.\n\
Can be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--nextseq_trim"
description: "NextSeq-specific quality trimming (each read). Trims also\ndark\
\ cycles appearing as high-quality G bases.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_cutoff"
alternatives:
- "-q"
description: "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore\
\ adapter removal. Applied to both reads if data is\npaired. If one value is\
\ given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given,\
\ the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_cutoff_r2"
alternatives:
- "-Q"
description: "Quality-trimming cutoff for R2. Default: same as for R1\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--quality_base"
description: "Assume that quality values in FASTQ are encoded as\nascii(quality\
\ + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default\
\ is 33.\n"
info: null
example:
- 33
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--poly_a"
description: "Trim poly-A tails"
info: null
direction: "input"
- type: "integer"
name: "--length"
alternatives:
- "-l"
description: "Shorten reads to LENGTH. Positive values remove bases at\nthe end\
\ while negative ones remove bases at the beginning.\nThis and the following\
\ modifications are applied after\nadapter trimming.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--trim_n"
description: "Trim N's on ends of reads."
info: null
direction: "input"
- type: "string"
name: "--length_tag"
description: "Search for TAG followed by a decimal number in the\ndescription\
\ field of the read. Replace the decimal number\nwith the correct length of\
\ the trimmed read. For example,\nuse --length-tag 'length=' to correct fields\
\ like\n'length=123'.\n"
info: null
example:
- "length="
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strip_suffix"
description: "Remove this suffix from read names if present. Can be\ngiven multiple\
\ times.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--prefix"
alternatives:
- "-x"
description: "Add this prefix to read names. Use {name} to insert the\nname of\
\ the matching adapter.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--suffix"
alternatives:
- "-y"
description: "Add this suffix to read names; can also include {name}\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--rename"
description: "Rename reads using TEMPLATE containing variables such as\n{id},\
\ {adapter_name} etc. (see documentation)\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--zero_cap"
alternatives:
- "-z"
description: "Change negative quality values to zero."
info: null
direction: "input"
- name: "Filtering of processed reads"
description: "Filters are applied after above read modifications. Paired-end reads\
\ are\nalways discarded pairwise (see also --pair_filter).\n"
arguments:
- type: "string"
name: "--minimum_length"
alternatives:
- "-m"
description: "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end\
\ reads, the minimum lengths for R1 and R2 can be specified separately by separating\
\ them with a colon (:).\nIf the colon syntax is not used, the same minimum\
\ length applies to both reads, as discussed above.\nAlso, one of the values\
\ can be omitted to impose no restrictions.\nFor example, with -m 17:, the length\
\ of R1 must be at least 17, but the length of R2 is ignored.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--maximum_length"
alternatives:
- "-M"
description: "Discard reads longer than LEN. Default: no limit.\nFor paired reads,\
\ see the remark for --minimum_length\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--max_n"
description: "Discard reads with more than COUNT 'N' bases. If COUNT is\na number\
\ between 0 and 1, it is interpreted as a fraction\nof the read length.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "long"
name: "--max_expected_errors"
alternatives:
- "--max_ee"
description: "Discard reads whose expected number of errors (computed\nfrom quality\
\ values) exceeds ERRORS.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "long"
name: "--max_average_error_rate"
alternatives:
- "--max_aer"
description: "as --max_expected_errors (see above), but divided by\nlength to\
\ account for reads of varying length.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--discard_trimmed"
alternatives:
- "--discard"
description: "Discard reads that contain an adapter. Use also -O to\navoid discarding\
\ too many randomly matching reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--discard_untrimmed"
alternatives:
- "--trimmed_only"
description: "Discard reads that do not contain an adapter.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--discard_casava"
description: "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n"
info: null
direction: "input"
- name: "Output parameters"
arguments:
- type: "string"
name: "--report"
description: "Which type of report to print: 'full' (default) or 'minimal'.\n"
info: null
example:
- "full"
required: false
choices:
- "full"
- "minimal"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--json"
description: "Write report in JSON format to this file.\n"
info: null
direction: "input"
- type: "file"
name: "--output"
description: "Glob pattern for matching the expected output files.\nShould include\
\ `$output_dir`.\n"
info: null
example:
- "fastq/*_001.fast[a,q]"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--fasta"
description: "Output FASTA to standard output even on FASTQ input.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--info_file"
description: "Write information about each read and its adapter matches\ninto\
\ info.txt in the output directory.\nSee the documentation for the file format.\n"
info: null
direction: "input"
- name: "Debug"
arguments:
- type: "boolean_true"
name: "--debug"
description: "Print debug information"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "RNA-seq"
- "scRNA-seq"
- "high-throughput"
license: "MIT"
references:
doi:
- "10.14806/ej.17.1.200"
links:
repository: "https://github.com/marcelm/cutadapt"
homepage: "https://cutadapt.readthedocs.io"
documentation: "https://cutadapt.readthedocs.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "python"
user: false
pip:
- "cutadapt"
upgrade: true
- type: "docker"
run:
- "cutadapt --version | sed 's/\\(.*\\)/cutadapt: \"\\1\"/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/cutadapt/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/cutadapt"
executable: "target/executable/cutadapt/cutadapt"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,343 @@
name: "falco"
version: "v0.2"
authors:
- name: "Toni Verbeiren"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input"
description: "input fastq files"
info: null
example:
- "input1.fastq;input2.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Run arguments"
arguments:
- type: "boolean_true"
name: "--nogroup"
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
\ data for every base in \nthe read. WARNING: When using this option, \nyour\
\ plots may end up a ridiculous size. You \nhave been warned!\n"
info: null
direction: "input"
- type: "file"
name: "--contaminents"
description: "Specifies a non-default file which contains \nthe list of contaminants\
\ to screen \noverrepresented sequences against. The file \nmust contain sets\
\ of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith\
\ a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--adapters"
description: "Specifies a non-default file which contains \nthe list of adapter\
\ sequences which will be \nexplicity searched against the library. The \nfile\
\ must contain sets of named adapters in \nthe form name[tab]sequence. Lines\
\ prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--limits"
description: "Specifies a non-default file which contains \na set of criteria\
\ which will be used to \ndetermine the warn/error limits for the \nvarious\
\ modules. This file can also be used \nto selectively remove some modules from\
\ the \noutput all together. The format needs to \nmirror the default limits.txt\
\ file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--subsample"
alternatives:
- "-s"
description: "[Falco only] makes falco faster (but \npossibly less accurate) by\
\ only processing \nreads that are a multiple of this value (using \n0-based\
\ indexing to number reads).\n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bisulfite"
alternatives:
- "-b"
description: "[Falco only] reads are whole genome \nbisulfite sequencing, and\
\ more Ts and fewer \nCs are therefore expected and will be \naccounted for\
\ in base content.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--reverse_complliment"
alternatives:
- "-r"
description: "[Falco only] The input is a \nreverse-complement. All modules will\
\ be \ntested by swapping A/T and C/G\n"
info: null
direction: "input"
- name: "Output arguments"
arguments:
- type: "file"
name: "--outdir"
alternatives:
- "-o"
description: "Create all output files in the specified \noutput directory. FALCO-SPECIFIC:\
\ If the \ndirectory does not exists, the program will \ncreate it.\n"
info: null
example:
- "output"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format \ndetection and forces\
\ the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped,\
\ sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n"
info: null
required: false
choices:
- "bam"
- "sam"
- "bam_mapped"
- "sam_mapped"
- "fastq"
- "fq"
- "fastq.gz"
- "fq.gz"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--data_filename"
alternatives:
- "-D"
description: "[Falco only] Specify filename for FastQC \ndata output (TXT). If\
\ not specified, it will \nbe called fastq_data.txt in either the input \nfile's\
\ directory or the one specified in the \n--output flag. Only available when\
\ running \nfalco with a single input.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--report_filename"
alternatives:
- "-R"
description: "[Falco only] Specify filename for FastQC \nreport output (HTML).\
\ If not specified, it \nwill be called fastq_report.html in either \nthe input\
\ file's directory or the one \nspecified in the --output flag. Only \navailable\
\ when running falco with a single \ninput.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--summary_filename"
alternatives:
- "-S"
description: "[Falco only] Specify filename for the short \nsummary output (TXT).\
\ If not specified, it \nwill be called fastq_report.html in either \nthe input\
\ file's directory or the one \nspecified in the --output flag. Only \navailable\
\ when running falco with a single \ninput.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "A C++ drop-in replacement of FastQC to assess the quality of sequence\
\ read data"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "qc"
- "fastqc"
- "sequencing"
license: "GPL-3.0"
references:
doi:
- "10.12688/f1000research.21142.2"
links:
repository: "https://github.com/smithlabcode/falco"
documentation: "https://falco.readthedocs.io/en/latest/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:trixie-slim"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "wget"
- "build-essential"
- "g++"
- "zlib1g-dev"
- "procps"
interactive: false
- type: "docker"
run:
- "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz\
\ -O /tmp/falco.tar.gz && \\\ncd /tmp && \\\ntar xvf falco.tar.gz && \\\ncd\
\ falco-1.2.2 && \\\n./configure && \\\nmake all && \\\nmake install\n"
- type: "docker"
run:
- "echo \"falco: \\\"$(falco -v | sed -n 's/^falco //p')\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/falco/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/falco"
executable: "target/executable/falco/falco"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

1537
target/executable/falco/falco Executable file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3429
target/executable/fastp/fastp Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,366 @@
name: "fastqc"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "FASTQ file(s) to be analyzed.\n"
info: null
example:
- "input.fq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
description: "At least one of the output options (--html, --zip, --summary, --data)\
\ must be used.\n"
arguments:
- type: "file"
name: "--html"
description: "Create the HTML report of the results. \n'*' wild card must be provided\
\ in the output file name. \nWild card will be replaced by the input file basename.\n\
e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\
\ html file named sample_1.html\n"
info: null
example:
- "*.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--zip"
description: "Create the zip file(s) containing: html report, data, images, icons,\
\ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\
\ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\
\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
info: null
example:
- "*.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--summary"
description: "Create the summary file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\
\ an output summary.txt file named sample_1_summary.txt\n"
info: null
example:
- "*_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--data"
description: "Create the data file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\
\ output data.txt file named sample_1_data.txt\n"
info: null
example:
- "*_data.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--casava"
description: "Files come from raw casava output. Files in the same sample\ngroup\
\ (differing only by the group number) will be analysed\nas a set rather than\
\ individually. Sequences with the filter\nflag set in the header will be excluded\
\ from the analysis.\nFiles must have the same names given to them by casava\n\
(including being gzipped and ending with .gz) otherwise they\nwon't be grouped\
\ together correctly.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nano"
description: "Files come from nanopore sequences and are in fast5 format. In\n\
this mode you can pass in directories to process and the program\nwill take\
\ in all fast5 files within those directories and produce\na single output file\
\ from the sequences found in all files.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nofilter"
description: "If running with --casava then don't remove read flagged by\ncasava\
\ as poor quality when performing the QC analysis.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nogroup"
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
\ data for every base in the read. \nWARNING: Using this option will cause fastqc\
\ to crash \nand burn if you use it on really long reads, and your \nplots may\
\ end up a ridiculous size. You have been warned!\n"
info: null
direction: "input"
- type: "integer"
name: "--min_length"
description: "Sets an artificial lower limit on the length of the \nsequence to\
\ be shown in the report. As long as you \nset this to a value greater or equal\
\ to your longest \nread length then this will be the sequence length used \n\
to create your read groups. This can be useful for making\ndirectly comparable\
\ statistics from datasets with somewhat \nvariable read lengths.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format detection and \nforces\
\ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\
\ sam_mapped, and fastq.\n"
info: null
example:
- "bam"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--contaminants"
alternatives:
- "-c"
description: "Specifies a non-default file which contains the list \nof contaminants\
\ to screen overrepresented sequences against. \nThe file must contain sets\
\ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\
\ a hash will be ignored.\n"
info: null
example:
- "contaminants.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--adapters"
alternatives:
- "-a"
description: "Specifies a non-default file which contains the list of \nadapter\
\ sequences which will be explicitly searched against \nthe library. The file\
\ must contain sets of named adapters \nin the form name[tab]sequence. Lines\
\ prefixed with a hash will be ignored.\n"
info: null
example:
- "adapters.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--limits"
alternatives:
- "-l"
description: "Specifies a non-default file which contains \na set of criteria\
\ which will be used to determine \nthe warn/error limits for the various modules.\
\ \nThis file can also be used to selectively remove \nsome modules from the\
\ output altogether. The format \nneeds to mirror the default limits.txt file\
\ found in \nthe Configuration folder.\n"
info: null
example:
- "limits.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--kmers"
alternatives:
- "-k"
description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\
\ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\
\ specified.\n"
info: null
example:
- 7
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Suppress all progress messages on stdout and only report errors.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "FastQC - A high throughput sequence QC analysis tool."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Quality control"
- "BAM"
- "SAM"
- "FASTQ"
license: "GPL-3.0, Apache-2.0"
links:
repository: "https://github.com/s-andrews/FastQC"
homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/"
issue_tracker: "https://github.com/s-andrews/FastQC/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "biocontainers/fastqc:v0.11.9_cv8"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/fastqc/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/fastqc"
executable: "target/executable/fastqc/fastqc"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

1680
target/executable/fastqc/fastqc Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,671 @@
name: "featurecounts"
version: "v0.2"
authors:
- name: "Sai Nirmayi Yasa"
roles:
- "author"
- "maintainer"
info:
links:
email: "nirmayi@data-intuitive.com"
github: "sainirmayi"
linkedin: "sai-nirmayi-yasa"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Junior Bioinformatics Researcher"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--annotation"
alternatives:
- "-a"
description: "Name of an annotation file. GTF/GFF format by default. See '--format'\
\ option for more format information.\n"
info: null
example:
- "annotation.gtf"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "A list of SAM or BAM format files separated by semi-colon (;). They\
\ can be either name or location sorted. Location-sorted paired-end reads are\
\ automatically sorted by read names.\n"
info: null
example:
- "input_file1.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--counts"
alternatives:
- "-o"
description: "Name of output file including read counts in tab delimited format.\n"
info: null
example:
- "features.tsv"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--summary"
description: "Summary statistics of counting results in tab delimited format.\n"
info: null
example:
- "summary.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--junctions"
description: "Count number of reads supporting each exon-exon junction. Junctions\
\ were identified from those exon-spanning reads in the input (containing 'N'\
\ in CIGAR string).\n"
info: null
example:
- "junctions.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Annotation"
arguments:
- type: "string"
name: "--format"
alternatives:
- "-F"
description: "Specify format of the provided annotation file. Acceptable formats\
\ include 'GTF' (or compatible GFF format) and 'SAF'. 'GTF' by default. \n"
info: null
example:
- "GTF"
required: false
choices:
- "GTF"
- "GFF"
- "SAF"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--feature_type"
alternatives:
- "-t"
description: "Specify feature type(s) in a GTF annotation. If multiple types are\
\ provided, they should be separated by ';' with no space in between. 'exon'\
\ by default. Rows in the annotation with a matched feature will be extracted\
\ and used for read mapping.\n"
info: null
example:
- "exon"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--attribute_type"
alternatives:
- "-g"
description: "Specify attribute type in GTF annotation. 'gene_id' by default.\
\ Meta-features used for read counting will be extracted from annotation using\
\ the provided value.\n"
info: null
example:
- "gene_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_attributes"
description: "Extract extra attribute types from the provided GTF annotation and\
\ include them in the counting output. These attribute types will not be used\
\ to group features. If more than one attribute type is provided they should\
\ be separated by semicolon (;).\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--chrom_alias"
alternatives:
- "-A"
description: "Provide a chromosome name alias file to match chr names in annotation\
\ with those in the reads. This should be a two-column comma-delimited text\
\ file. Its first column should include chr names in the annotation and its\
\ second column should include chr names in the reads. Chr names are case sensitive.\
\ No column header should be included in the file.\n"
info: null
example:
- "chrom_alias.csv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Level of summarization"
arguments:
- type: "boolean_true"
name: "--feature_level"
alternatives:
- "-f"
description: "Perform read counting at feature level (eg. counting reads for exons\
\ rather than genes).\n"
info: null
direction: "input"
- name: "Overlap between reads and features"
arguments:
- type: "boolean_true"
name: "--overlapping"
alternatives:
- "-O"
description: "Assign reads to all their overlapping meta-features (or features\
\ if '--feature_level' is specified).\n"
info: null
direction: "input"
- type: "integer"
name: "--min_overlap"
description: "Minimum number of overlapping bases in a read that is required for\
\ read assignment. 1 by default. Number of overlapping bases is counted from\
\ both reads if paired end. If a negative value is provided, then a gap of up\
\ to specified size will be allowed between read and the feature that the read\
\ is assigned to.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--frac_overlap"
description: "Minimum fraction of overlapping bases in a read that is required\
\ for read assignment. Value should be within range [0,1]. 0 by default. Number\
\ of overlapping bases is counted from both reads if paired end. Both this option\
\ and '--min_overlap' option need to be satisfied for read assignment.\n"
info: null
example:
- 0.0
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--frac_overlap_feature"
description: "Minimum fraction of overlapping bases in a feature that is required\
\ for read assignment. Value should be within range [0,1]. 0 by default.\n"
info: null
example:
- 0.0
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--largest_overlap"
description: "Assign reads to a meta-feature/feature that has the largest number\
\ of overlapping bases.\n"
info: null
direction: "input"
- type: "integer"
name: "--non_overlap"
description: "Maximum number of non-overlapping bases in a read (or a read pair)\
\ that is allowed when being assigned to a feature. No limit is set by default.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--non_overlap_feature"
description: "Maximum number of non-overlapping bases in a feature that is allowed\
\ in read assignment. No limit is set by default.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_extension5"
description: "Reads are extended upstream by <int> bases from their 5' end.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_extension3"
description: "Reads are extended upstream by <int> bases from their 3' end.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read2pos"
description: "Reduce reads to their 5' most base or 3' most base. Read counting\
\ is then performed based on the single base the read is reduced to.\n"
info: null
required: false
choices:
- 3
- 5
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Multi-mapping reads"
arguments:
- type: "boolean_true"
name: "--multi_mapping"
alternatives:
- "-M"
description: "Multi-mapping reads will also be counted. For a multi-mapping read,\
\ all its reported alignments will be counted. The 'NH' tag in BAM/SAM input\
\ is used to detect multi-mapping reads.\n"
info: null
direction: "input"
- name: "Fractional counting"
arguments:
- type: "boolean_true"
name: "--fraction"
description: "Assign fractional counts to features. This option must be used together\
\ with '--multi_mapping' or '--overlapping' or both. When '--multi_mapping'\
\ is specified, each reported alignment from a multi-mapping read (identified\
\ via 'NH' tag) will carry a fractional count of 1/x, instead of 1 (one), where\
\ x is the total number of alignments reported for the same read. When '--overlapping'\
\ is specified, each overlapping feature will receive a fractional count of\
\ 1/y, where y is the total number of features overlapping with the read. When\
\ both '--multi_mapping' and '--overlapping' are specified, each alignment will\
\ carry a fractional count of 1/(x*y).\n"
info: null
direction: "input"
- name: "Read filtering"
arguments:
- type: "integer"
name: "--min_map_quality"
alternatives:
- "-Q"
description: "The minimum mapping quality score a read must satisfy in order to\
\ be counted. For paired-end reads, at least one end should satisfy this criteria.\
\ 0 by default.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--split_only"
description: "Count split alignments only (ie. alignments with CIGAR string containing\
\ 'N'). An example of split alignments is exon-spanning reads in RNA-seq data.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--non_split_only"
description: "If specified, only non-split alignments (CIGAR strings do not contain\
\ letter 'N') will be counted. All the other alignments will be ignored.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--primary"
description: "Count primary alignments only. Primary alignments are identified\
\ using bit 0x100 in SAM/BAM FLAG field.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ignore_dup"
description: "Ignore duplicate reads in read counting. Duplicate reads are identified\
\ using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one\
\ of the reads is a duplicate read for paired end data.\n"
info: null
direction: "input"
- name: "Strandedness"
arguments:
- type: "integer"
name: "--strand"
alternatives:
- "-s"
description: "Perform strand-specific read counting. A single integer value (applied\
\ to all input files) should be provided. Possible values include: 0 (unstranded),\
\ 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded\
\ read counting carried out for all input files).\n"
info: null
example:
- 0
required: false
choices:
- 0
- 1
- 2
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Exon-exon junctions"
arguments:
- type: "file"
name: "--ref_fasta"
alternatives:
- "-G"
description: "Provide the name of a FASTA-format file that contains the reference\
\ sequences used in read mapping that produced the provided SAM/BAM files.\n"
info: null
example:
- "reference.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Parameters specific to paired end reads"
arguments:
- type: "boolean_true"
name: "--paired"
alternatives:
- "-p"
description: "Specify that input data contain paired-end reads. To perform fragment\
\ counting (ie. counting read pairs), the '--countReadPairs' parameter should\
\ also be specified in addition to this parameter.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--count_read_pairs"
description: "Count read pairs (fragments) instead of reads. This option is only\
\ applicable for paired-end reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--both_aligned"
alternatives:
- "-B"
description: "Count read pairs (fragments) instead of reads. This option is only\
\ applicable for paired-end reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--check_pe_dist"
alternatives:
- "-P"
description: "Check validity of paired-end distance when counting read pairs.\
\ Use '--min_length' and '--max_length' to set thresholds.\n"
info: null
direction: "input"
- type: "integer"
name: "--min_length"
alternatives:
- "-d"
description: "Minimum fragment/template length, 50 by default.\n"
info: null
example:
- 50
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_length"
alternatives:
- "-D"
description: "Maximum fragment/template length, 600 by default.\n"
info: null
example:
- 600
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--same_strand"
alternatives:
- "-C"
description: "Do not count read pairs that have their two ends mapping to different\
\ chromosomes or mapping to same chromosome but on different strands.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--donotsort"
description: "Do not sort reads in BAM/SAM input. Note that reads from the same\
\ pair are required to be located next to each other in the input.\n"
info: null
direction: "input"
- name: "Read groups"
arguments:
- type: "boolean_true"
name: "--by_read_group"
description: "Assign reads by read group. \"RG\" tag is required to be present\
\ in the input BAM/SAM files.\n"
info: null
direction: "input"
- name: "Long reads"
arguments:
- type: "boolean_true"
name: "--long_reads"
description: "Count long reads such as Nanopore and PacBio reads. Long read counting\
\ can only run in one thread and only reads (not read-pairs) can be counted.\
\ There is no limitation on the number of 'M' operations allowed in a CIGAR\
\ string in long read counting.\n"
info: null
direction: "input"
- name: "Assignment results for each read"
arguments:
- type: "file"
name: "--detailed_results"
description: "Directory to save the detailed assignment results. Use `--detailed_results_format`\
\ to determine the format of the detailed results.\n"
info: null
example:
- "detailed_results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--detailed_results_format"
alternatives:
- "-R"
description: "Output detailed assignment results for each read or read-pair. Results\
\ are saved to a file that is in one of the following formats: CORE, SAM and\
\ BAM. See documentaiton for more info about these formats.\n"
info: null
required: false
choices:
- "CORE"
- "SAM"
- "BAM"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Miscellaneous"
arguments:
- type: "integer"
name: "--max_M_op"
description: "Maximum number of 'M' operations allowed in a CIGAR string. 10 by\
\ default. Both 'X' and '=' are treated as 'M' and adjacent 'M' operations are\
\ merged in the CIGAR string.\n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
description: "Output verbose information for debugging, such as un-matched chromosome/contig\
\ names.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "featureCounts is a read summarization program for counting reads generated\
\ from either RNA or genomic DNA sequencing experiments by implementing highly efficient\
\ chromosome hashing and feature blocking techniques. It works with either single\
\ or paired-end reads and provides a wide range of options appropriate for different\
\ sequencing applications.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Read counting"
- "Genomic features"
license: "GPL-3.0"
references:
doi:
- "10.1093/bioinformatics/btt656"
links:
repository: "https://github.com/ShiLab-Bioinformatics/subread"
homepage: "https://subread.sourceforge.net/"
documentation: "https://subread.sourceforge.net/SubreadUsersGuide.pdf"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/subread:2.0.6--he4a0461_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "featureCounts -v 2>&1 | sed 's/featureCounts v\\([0-9.]*\\)/featureCounts:\
\ \\1/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/featurecounts/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/featurecounts"
executable: "target/executable/featurecounts/featurecounts"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,711 @@
name: "gffread"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "A reference file in either the GFF3, GFF2 or GTF format.\n"
info: null
example:
- "annotation.gff"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chr_mapping"
alternatives:
- "-m"
description: "<chr_replace> is a name mapping table for converting reference sequence\
\ names, \nhaving this 2-column format: <original_ref_ID> <new_ref_ID>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--seq_info"
alternatives:
- "-s"
description: "<seq_info.fsize> is a tab-delimited file providing this info for\
\ each of the mapped \nsequences: <seq-name> <seq-length> <seq-description>\
\ (useful for --description option with \nmRNA/EST/protein mappings).\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "Full path to a multi-fasta file with the genomic sequences for all\
\ input mappings, \nOR a directory with single-fasta files (one per genomic\
\ sequence, with file names \nmatching sequence names).\n"
info: null
example:
- "genome.fa"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--outfile"
alternatives:
- "-o"
description: "Write the output records into <outfile>.\n"
info: null
example:
- "output.gff"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--force_exons"
description: "Make sure that the lowest level GFF features are considered \"exon\"\
\ features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--gene2exon"
description: "For single-line genes not parenting any transcripts, add an exon\
\ feature spanning \nthe entire gene (treat it as a transcript).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--t_adopt"
description: "Try to find a parent gene overlapping/containing a transcript that\
\ does not have \nany explicit gene Parent.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--decode"
alternatives:
- "-D"
description: "Decode url encoded characters within attributes.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--merge_exons"
alternatives:
- "-Z"
description: "Merge very close exons into a single exon (when intron size<4).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--junctions"
alternatives:
- "-j"
description: "Output the junctions and the corresponding transcripts.\n"
info: null
direction: "input"
- type: "file"
name: "--spliced_exons"
alternatives:
- "-w"
description: "Write a fasta file with spliced exons for each transcript.\n"
info: null
example:
- "exons.fa"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--w_add"
description: "For the --spliced_exons option, extract additional <N> bases both\
\ upstream and \ndownstream of the transcript boundaries.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--w_nocds"
description: "For --spliced_exons, disable the output of CDS info in the FASTA\
\ file.\n"
info: null
direction: "input"
- type: "file"
name: "--spliced_cds"
alternatives:
- "-x"
description: "Write a fasta file with spliced CDS for each GFF transcript.\n"
info: null
example:
- "cds.fa"
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tr_cds"
alternatives:
- "-y"
description: "Write a protein fasta file with the translation of CDS for each\
\ record.\n"
info: null
example:
- "tr_cds.fa"
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--w_coords"
alternatives:
- "-W"
description: "For --spliced_exons, --spliced_cds and -tr_cds options, write in\
\ the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--stop_dot"
alternatives:
- "-S"
description: "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--id_version"
alternatives:
- "-L"
description: "Ensembl GTF to GFF3 conversion, adds version to IDs.\n"
info: null
direction: "input"
- type: "string"
name: "--trackname"
alternatives:
- "-t"
description: "Use <trackname> in the 2nd column of each GFF/GTF output line.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--gtf_output"
alternatives:
- "-T"
description: "Main output will be GTF instead of GFF3.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed"
description: "Output records in BED format instead of default GFF3.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--tlf"
description: "Output \"transcript line format\" which is like GFF but with exons\
\ and CDS related \nfeatures stored as GFF attributes in the transcript feature\
\ line, like this:\n exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>\n\
<exons> is a comma-delimited list of exon_start-exon_end coordinates;\n<CDScoords>\
\ is CDS_start:CDS_end coordinates or a list like <exons>.\n"
info: null
direction: "input"
- type: "string"
name: "--table"
description: "Output a simple tab delimited format instead of GFF, with columns\
\ having the values \nof GFF attributes given in <attrlist>; special pseudo-attributes\
\ (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand,\
\ @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds\
\ FASTA output files are enabled, the \nsame fields (excluding @id) are appended\
\ to the definition line of corresponding FASTA\nrecords.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--expose_dups"
alternatives:
- "-E"
- "-v"
description: "Expose (warn about) duplicate transcript IDs and other potential\
\ problems with the \ngiven GFF/GTF records.\n"
info: null
direction: "input"
- name: "Options"
arguments:
- type: "file"
name: "--ids"
description: "Discard records/transcripts if their IDs are not listed in <IDs.lst>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nids"
description: "Discard records/transcripts if their IDs are listed in <IDs.lst>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--maxintron"
alternatives:
- "-i"
description: "Discard transcripts having an intron larger than <maxintron>.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--minlen"
alternatives:
- "-l"
description: "Discard transcripts shorter than <minlen> bases.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--range"
alternatives:
- "-r"
description: "Only show transcripts overlapping coordinate range <start>..<end>\
\ (on chromosome/contig \n<chr>, strand <strand> if provided).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--strict_range"
alternatives:
- "-R"
description: "For --range option, discard all transcripts that are not fully contained\
\ within the given \nrange.\n"
info: null
direction: "input"
- type: "string"
name: "--jmatch"
description: "Only output transcripts matching the given junction.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_single_exon"
alternatives:
- "-U"
description: "Discard single-exon transcripts.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--coding"
alternatives:
- "-C"
description: "Coding only: discard mRNAs that have no CDS features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nc"
description: "Non-coding only: discard mRNAs that have CDS features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ignore_locus"
description: "Discard locus features and attributes found in the input.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--description"
alternatives:
- "-A"
description: "Use the description field from <seq_info.fsize> and add it as the\
\ value for a 'descr' \nattribute to the GFF record.\n"
info: null
direction: "input"
- name: "Sorting"
arguments:
- type: "boolean_true"
name: "--sort_alpha"
description: "Chromosomes (reference sequences) are sorted alphabetically.\n"
info: null
direction: "input"
- type: "file"
name: "--sort_by"
description: "Sort the reference sequences by the order in which their names are\
\ given in the \n<refseq.lst> file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Misc options"
arguments:
- type: "boolean_true"
name: "--keep_attrs"
alternatives:
- "-F"
description: "Keep all GFF attributes (for non-exon features).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep_exon_attrs"
description: "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_exon_attrs"
alternatives:
- "-G"
description: "Do not keep exon attributes, move them to the transcript feature\
\ (for GFF3 output).\n"
info: null
direction: "input"
- type: "string"
name: "--attrs"
description: "Only output the GTF/GFF attributes listed in <attr-list> which is\
\ a comma delimited \nlist of attribute names to.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--keep_genes"
description: "In transcript-only mode (default), also preserve gene records.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep_comments"
description: "For GFF3 input/output, try to preserve comments.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--process_other"
alternatives:
- "-O"
description: "process other non-transcript GFF records (by default non-transcript\
\ records are ignored).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--rm_stop_codons"
alternatives:
- "-V"
description: "Discard any mRNAs with CDS having in-frame stop codons (requires\
\ --genome).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--adj_cds_start"
alternatives:
- "-H"
description: "For --rm_stop_codons option, check and adjust the starting CDS phase\
\ if the original phase\nleads to a translation with an in-frame stop codon.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--opposite_strand"
alternatives:
- "-B"
description: "For -V option, single-exon transcripts are also checked on the opposite\
\ strand (requires \n--genome). \n"
info: null
direction: "input"
- type: "boolean_true"
name: "--coding_status"
alternatives:
- "-P"
description: "Add transcript level GFF attributes about the coding status of each\
\ transcript, including \npartialness or in-frame stop codons (requires --genome).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--add_hasCDS"
description: "Add a \"hasCDS\" attribute with value \"true\" for transcripts that\
\ have CDS features. \n"
info: null
direction: "input"
- type: "boolean_true"
name: "--adj_stop"
description: "Stop codon adjustment: enables --coding_status and performs automatic\
\ adjustment of the CDS stop \ncoordinate if premature or downstream.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--rm_noncanon"
alternatives:
- "-N"
description: "Discard multi-exon mRNAs that have any intron with a non-canonical\
\ splice site consensus \n(i.e. not GT-AG, GC-AG or AT-AC).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--complete_cds"
alternatives:
- "-J"
description: "Discard any mRNAs that either lack initial START codon or the terminal\
\ STOP codon, or \nhave an in-frame stop codon (i.e. only print mRNAs with a\
\ complete CDS).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_pseudo"
description: "Filter out records matching the 'pseudo' keyword.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--in_bed"
description: "Input should be parsed as BED format (automatic if the input filename\
\ ends with .bed*).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--in_tlf"
description: "Input GFF-like one-line-per-transcript format without exon/CDS features\
\ (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--stream"
description: "Fast processing of input GFF/BED transcripts as they are received\
\ (no sorting, exons must \nbe grouped by transcript in the input data).\n"
info: null
direction: "input"
- name: "Clustering"
arguments:
- type: "boolean_true"
name: "--merge"
alternatives:
- "-M"
description: "Cluster the input transcripts into loci, discarding \"redundant\"\
\ transcripts (those with \nthe same exact introns and fully contained or equal\
\ boundaries).\n"
info: null
direction: "input"
- type: "file"
name: "--dupinfo"
alternatives:
- "-d"
description: "For --merge option, write duplication info to file <dupinfo>.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--cluster_only"
description: "Same as --merge but without discarding any of the \"duplicate\"\
\ transcripts, only create \n\"locus\" features.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--rm_redundant"
alternatives:
- "-K"
description: "For --merge option: also discard as redundant the shorter, fully\
\ contained transcripts (intron \nchains matching a part of the container).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_boundary"
alternatives:
- "-Q"
description: "For --merge option, no longer require boundary containment when\
\ assessing redundancy (can be \ncombined with --rm_redundant); only introns\
\ have to match for multi-exon transcripts, and >=80%\noverlap for single-exon\
\ transcripts.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_overlap"
alternatives:
- "-Y"
description: "For --merge option, enforce --no_boundary but also discard overlapping\
\ single-exon transcripts,\neven on the opposite strand (can be combined with\
\ --rm_redudant).\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Validate, filter, convert and perform various other operations on GFF\
\ files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "gff"
- "conversion"
- "validation"
- "filtering"
license: "MIT"
references:
doi:
- "10.12688/f1000research.23297.2"
links:
repository: "https://github.com/gpertea/gffread"
homepage: "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread"
documentation: "https://ccb.jhu.edu/software/stringtie/gff.shtml#gffread"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/gffread:0.12.7--hdcf5f25_3"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo \"gffread: \\\"$(gffread --version 2>&1)\\\"\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/gffread/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/gffread"
executable: "target/executable/gffread/gffread"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

2745
target/executable/gffread/gffread Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,533 @@
name: "lofreq_call"
namespace: "lofreq"
version: "v0.2"
authors:
- name: "Kai Waldrant"
roles:
- "author"
- "maintainer"
info:
links:
email: "kai@data-intuitive.com"
github: "KaiWaldrant"
orcid: "0009-0003-8555-1361"
linkedin: "kaiwaldrant"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Contributor"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input BAM file.\n"
info: null
example:
- "normal.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_bai"
description: "Index file for the input BAM file.\n"
info: null
example:
- "normal.bai"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ref"
alternatives:
- "-f"
description: "Indexed reference fasta file (gzip supported). Default: none.\n"
info: null
example:
- "reference.fasta"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--out"
alternatives:
- "-o"
description: "Vcf output file. Default: stdout.\n"
info: null
example:
- "output.vcf"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--region"
alternatives:
- "-r"
description: "Limit calls to this region (chrom:start-end). Default: none.\n"
info: null
example:
- "chr1:1000-2000"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bed"
alternatives:
- "-l"
description: "List of positions (chr pos) or regions (BED). Default: none.\n"
info: null
example:
- "regions.bed"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_bq"
alternatives:
- "-q"
description: "Skip any base with baseQ smaller than INT. Default: 6.\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_alt_bq"
alternatives:
- "-Q"
description: "Skip alternate bases with baseQ smaller than INT. Default: 6.\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--def_alt_bq"
alternatives:
- "-R"
description: "Overwrite baseQs of alternate bases (that passed bq filter) with\
\ this value (-1: use median ref-bq; 0: keep). Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_jq"
alternatives:
- "-j"
description: "Skip any base with joinedQ smaller than INT. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_alt_jq"
alternatives:
- "-J"
description: "Skip alternate bases with joinedQ smaller than INT. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--def_alt_jq"
alternatives:
- "-K"
description: "Overwrite joinedQs of alternate bases (that passed jq filter) with\
\ this value (-1: use median ref-bq; 0: keep). Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_baq"
alternatives:
- "-B"
description: "Disable use of base-alignment quality (BAQ).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_idaq"
alternatives:
- "-A"
description: "Don't use IDAQ values (NOT recommended under ANY circumstances other\
\ than debugging).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--del_baq"
alternatives:
- "-D"
description: "Delete pre-existing BAQ values, i.e. compute even if already present\
\ in BAM.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_ext_baq"
alternatives:
- "-e"
description: "Use 'normal' BAQ (samtools default) instead of extended BAQ (both\
\ computed on the fly if not already present in lb tag).\n"
info: null
direction: "input"
- type: "integer"
name: "--min_mq"
alternatives:
- "-m"
description: "Skip reads with mapping quality smaller than INT. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_mq"
alternatives:
- "-M"
description: "Cap mapping quality at INT. Default: 255.\n"
info: null
example:
- 255
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_mq"
alternatives:
- "-N"
description: "Don't merge mapping quality in LoFreq's model.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--call_indels"
description: "Enable indel calls (note: preprocess your file to include indel\
\ alignment qualities!).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--only_indels"
description: "Only call indels; no SNVs.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--src_qual"
alternatives:
- "-s"
description: "Enable computation of source quality.\n"
info: null
direction: "input"
- type: "file"
name: "--ign_vcf"
alternatives:
- "-S"
description: "Ignore variants in this vcf file for source quality computation.\
\ Multiple files can be given separated by commas.\n"
info: null
example:
- "variants.vcf"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--def_nm_q"
alternatives:
- "-T"
description: "If >= 0, then replace non-match base qualities with this default\
\ value. Default: -1.\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--sig"
alternatives:
- "-a"
description: "P-Value cutoff / significance level. Default: 0.010000.\n"
info: null
example:
- 0.01
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--bonf"
alternatives:
- "-b"
description: "Bonferroni factor. 'dynamic' (increase per actually performed test)\
\ or INT. Default: Dynamic.\n"
info: null
example:
- "dynamic"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_cov"
alternatives:
- "-C"
description: "Test only positions having at least this coverage. Default: 1.\n\
(note: without --no-default-filter default filters (incl. coverage) kick in\
\ after predictions are done).\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_depth"
alternatives:
- "-d"
description: "Cap coverage at this depth. Default: 1000000.\n"
info: null
example:
- 1000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--illumina_13"
description: "Assume the quality is Illumina-1.3-1.7/ASCII+64 encoded.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--use_orphan"
description: "Count anomalous read pairs (i.e. where mate is not aligned properly).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--plp_summary_only"
description: "No variant calling. Just output pileup summary per column.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_default_filter"
description: "Don't run default 'lofreq filter' automatically after calling variants.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--force_overwrite"
description: "Overwrite any existing output.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--verbose"
description: "Be verbose.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--debug"
description: "Enable debugging.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Call variants from a BAM file.\n\nLoFreq* (i.e. LoFreq version 2) is\
\ a fast and sensitive variant-caller for inferring SNVs and indels from next-generation\
\ sequencing data. It makes full use of base-call qualities and other sources of\
\ errors inherent in sequencing (e.g. mapping or base/indel alignment uncertainty),\
\ which are usually ignored by other methods or only used for filtering.\n\nLoFreq*\
\ can run on almost any type of aligned sequencing data (e.g. Illumina, IonTorrent\
\ or Pacbio) since no machine- or sequencing-technology dependent thresholds are\
\ used. It automatically adapts to changes in coverage and sequencing quality and\
\ can therefore be applied to a variety of data-sets e.g. viral/quasispecies, bacterial,\
\ metagenomics or somatic data.\n\nLoFreq* is very sensitive; most notably, it is\
\ able to predict variants below the average base-call quality (i.e. sequencing\
\ error rate). Each variant call is assigned a p-value which allows for rigorous\
\ false positive control. Even though it uses no approximations or heuristics, it\
\ is very efficient due to several runtime optimizations and also provides a (pseudo-)parallel\
\ implementation. LoFreq* is generic and fast enough to be applied to high-coverage\
\ data and large genomes. On a single processor it takes a minute to analyze Dengue\
\ genome sequencing data with nearly 4000X coverage, roughly one hour to call SNVs\
\ on a 600X coverage E.coli genome and also roughly an hour to run on a 100X coverage\
\ human exome dataset.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "variant calling"
- "low frequancy variant calling"
- "lofreq"
- "lofreq/call"
license: "MIT"
references:
doi:
- "10.1093/nar/gks918"
links:
repository: "https://github.com/viash-hub/biobox"
homepage: "https://csb5.github.io/lofreq/"
documentation: "https://csb5.github.io/lofreq/commands/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/lofreq:2.1.5--py38h794fc9e_10"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "version=$(lofreq version | grep 'version' | sed 's/version: //') && \\\necho\
\ \"lofreq: $version\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/lofreq/call/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/lofreq/lofreq_call"
executable: "target/executable/lofreq/lofreq_call/lofreq_call"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,241 @@
name: "lofreq_indelqual"
namespace: "lofreq"
version: "v0.2"
authors:
- name: "Kai Waldrant"
roles:
- "author"
- "maintainer"
info:
links:
email: "kai@data-intuitive.com"
github: "KaiWaldrant"
orcid: "0009-0003-8555-1361"
linkedin: "kaiwaldrant"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Contributor"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input BAM file.\n"
info: null
example:
- "normal.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ref"
alternatives:
- "-f"
description: "Reference sequence used for mapping (Only required for --dindel).\n"
info: null
example:
- "reference.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--out"
alternatives:
- "-o"
description: "Output BAM file.\n"
info: null
example:
- "output.bam"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "string"
name: "--uniform"
alternatives:
- "-u"
description: "Add this indel quality uniformly to all bases. Use two comma separated\
\ values to specify insertion and deletion quality separately. (clashes with\
\ --dindel).\n"
info: null
example:
- "50,50"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--dindel"
description: "Add Dindel's indel qualities (Illumina specific) (clashes with -u;\
\ needs --ref).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--verbose"
description: "Be verbose.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Insert indel qualities into BAM file (required for indel predictions).\n\
\nThe preferred way of inserting indel qualities should be via GATK's BQSR (>=2)\
\ If that's not possible, use this subcommand.\nThe command has two modes: 'uniform'\
\ and 'dindel':\n- 'uniform' will assign a given value uniformly, whereas\n- 'dindel'\
\ will insert indel qualities based on Dindel (PMID 20980555).\nBoth will overwrite\
\ any existing values.\nDo not realign your BAM file afterwards!\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "bam"
- "indel"
- "qualities"
- "indelqual"
- "lofreq"
- "lofreq/indelqual"
license: "MIT"
references:
doi:
- "10.1093/nar/gks918"
links:
repository: "https://github.com/viash-hub/biobox"
homepage: "https://csb5.github.io/lofreq/"
documentation: "https://csb5.github.io/lofreq/commands/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/lofreq:2.1.5--py38h794fc9e_10"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "version=$(lofreq version | grep 'version' | sed 's/version: //') && \\\necho\
\ \"lofreq: $version\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/lofreq/indelqual/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/lofreq/lofreq_indelqual"
executable: "target/executable/lofreq/lofreq_indelqual/lofreq_indelqual"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,482 @@
name: "multiqc"
version: "v0.2"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "File paths to be searched for analysis results to be included in\
\ the report.\n"
info: null
example:
- "data/results"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Ouput"
arguments:
- type: "file"
name: "--output_report"
description: "Filepath of the generated report.\n"
info: null
example:
- "multiqc_report.html"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_data"
description: "Output directory for parsed data files. If not provided, parsed\
\ data will not be published.\n"
info: null
example:
- "multiqc_data"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_plots"
description: "Output directory for generated plots. If not provided, plots will\
\ not be published.\n"
info: null
example:
- "multiqc_plots"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Modules and analyses to run"
arguments:
- type: "string"
name: "--include_modules"
description: "Use only these module"
info: null
example:
- "fastqc"
- "cutadapt"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--exclude_modules"
description: "Do not use only these modules"
info: null
example:
- "fastqc"
- "cutadapt"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--ignore_analysis"
info: null
example:
- "run_one/*"
- "run_two/*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--ignore_samples"
info: null
example:
- "sample_1*"
- "sample_3*"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--ignore_symlinks"
description: "Ignore symlinked directories and files"
info: null
direction: "input"
- name: "Sample name handling"
arguments:
- type: "boolean_true"
name: "--dirs"
description: "Prepend directory to sample names to avoid clashing filenames"
info: null
direction: "input"
- type: "integer"
name: "--dirs_depth"
description: "Prepend n directories to sample names. Negative number to take from\
\ start of path."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--full_names"
description: "Do not clean the sample names (leave as full file name)"
info: null
direction: "input"
- type: "boolean_true"
name: "--fn_as_s_name"
description: "Use the log filename as the sample name"
info: null
direction: "input"
- type: "file"
name: "--replace_names"
description: "TSV file to rename sample names during report generation"
info: null
example:
- "replace_names.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Report Customisation"
arguments:
- type: "string"
name: "--title"
description: "Report title. Printed as page header, used for filename if not otherwise\
\ specified.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--comment"
description: "Custom comment, will be printed at the top of the report.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--template"
description: "Report template to use.\n"
info: null
required: false
choices:
- "default"
- "gathered"
- "geo"
- "highcharts"
- "sections"
- "simple"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_names"
description: "TSV file containing alternative sample names for renaming buttons\
\ in the report.\n"
info: null
example:
- "sample_names.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_filters"
description: "TSV file containing show/hide patterns for the report\n"
info: null
example:
- "sample_filters.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--custom_css_file"
description: "Custom CSS file to add to the final report\n"
info: null
example:
- "custom_style_sheet.css"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--profile_runtime"
description: "Add analysis of how long MultiQC takes to run to the report\n"
info: null
direction: "input"
- name: "MultiQC behaviour"
arguments:
- type: "boolean_true"
name: "--verbose"
description: "Increase output verbosity.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
description: "Only show log warnings\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--strict"
description: "Don't catch exceptions, run additional code checks to help development.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--development"
description: "Development mode. Do not compress and minimise JS, export uncompressed\
\ plot data.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--require_logs"
description: "Require all explicitly requested modules to have log files. If not,\
\ MultiQC will exit with an error.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_megaqc_upload"
description: "Don't upload generated report to MegaQC, even if MegaQC options\
\ are found.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_ansi"
description: "Disable coloured log output.\n"
info: null
direction: "input"
- type: "string"
name: "--cl_config"
description: "YAML formatted string that allows to customize MultiQC behaviour\
\ like input file detection.\n"
info: null
example:
- "qualimap_config: { general_stats_coverage: [20,40,200] }"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output format"
arguments:
- type: "boolean_true"
name: "--flat"
description: "Use only flat plots (static images).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--interactive"
description: "Use only interactive plots (in-browser Javascript).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--data_dir"
description: "Force the parsed data directory to be created.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_data_dir"
description: "Prevent the parsed data directory from being created.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--zip_data_dir"
description: "Compress the data directory.\n"
info: null
direction: "input"
- type: "string"
name: "--data_format"
description: "Output parsed data in a different format than the default 'txt'.\n"
info: null
required: false
choices:
- "tsv"
- "csv"
- "json"
- "yaml"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--pdf"
description: "Creates PDF report with the 'simple' template. Requires Pandoc to\
\ be installed.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "MultiQC aggregates results from bioinformatics analyses across many\
\ samples into a single report.\nIt searches a given directory for analysis logs\
\ and compiles a HTML report. It's a general use tool, perfect for summarising the\
\ output from numerous bioinformatics tools.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info:
keywords:
- "QC"
- "html report"
- "aggregate analysis"
links:
homepage: "https://multiqc.info/"
documentation: "https://multiqc.info/docs/"
repository: "https://github.com/MultiQC/MultiQC"
references:
doi: "10.1093/bioinformatics/btw354"
licence: "GPL v3 or later"
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/biobox"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "multiqc --version | sed 's/multiqc, version\\s\\(.*\\)/multiqc: \"\\1\"/' >\
\ /var/software_versions.txt\n"
test_setup:
- type: "apt"
packages:
- "jq"
interactive: false
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/multiqc/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/multiqc"
executable: "target/executable/multiqc/multiqc"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

1985
target/executable/multiqc/multiqc Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,424 @@
name: "pear"
version: "v0.2"
authors:
- name: "Kai Waldrant"
roles:
- "author"
- "maintainer"
info:
links:
email: "kai@data-intuitive.com"
github: "KaiWaldrant"
orcid: "0009-0003-8555-1361"
linkedin: "kaiwaldrant"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
- name: "Open Problems"
href: "https://openproblems.bio"
role: "Contributor"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--forward_fastq"
alternatives:
- "-f"
description: "Forward paired-end FASTQ file"
info: null
example:
- "forward.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reverse_fastq"
alternatives:
- "-r"
description: "Reverse paired-end FASTQ file"
info: null
example:
- "reverse.fastq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--assembled"
description: "The output file containing assembled reads. Can be compressed with\
\ gzip."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--unassembled_forward"
description: "The output file containing forward reads that could not be assembled.\
\ Can be compressed with gzip."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--unassembled_reverse"
description: "The output file containing reverse reads that could not be assembled.\
\ Can be compressed with gzip."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--discarded"
description: "The output file containing reads that were discarded due to too\
\ low quality or too many uncalled bases. Can be compressed with gzip."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Arguments"
arguments:
- type: "double"
name: "--p_value"
alternatives:
- "-p"
description: "Specify a p-value for the statistical test. If the computed p-value\
\ of a possible assembly exceeds the specified p-value then paired-end read\
\ will not be assembled. Valid options are: 0.0001, 0.001, 0.01, 0.05 and 1.0.\
\ Setting 1.0 disables the test.\n"
info: null
example:
- 0.01
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_overlap"
alternatives:
- "-v"
description: "Specify the minimum overlap size. The minimum overlap may be set\
\ to 1 when the statistical test is used. However, further restricting the minimum\
\ overlap size to a proper value may reduce false-positive assembles.\n"
info: null
example:
- 10
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_assembly_length"
alternatives:
- "-m"
description: "Specify the maximum possible length of the assembled sequences.\
\ Setting this value to 0 disables the restriction and assembled sequences may\
\ be arbitrary long.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_assembly_length"
alternatives:
- "-n"
description: "Specify the minimum possible length of the assembled sequences.\
\ Setting this value to 0 disables the restriction and assembled sequences may\
\ be arbitrary short.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_trim_length"
alternatives:
- "-t"
description: "Specify the minimum length of reads after trimming the low quality\
\ part (see option -q)\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--quality_threshold"
alternatives:
- "-q"
description: "Specify the quality threshold for trimming the low quality part\
\ of a read. If the quality scores of two consecutive bases are strictly less\
\ than the specified threshold, the rest of the read will be trimmed.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--max_uncalled_base"
alternatives:
- "-u"
description: "Specify the maximal proportion of uncalled bases in a read. Setting\
\ this value to 0 will cause PEAR to discard all reads containing uncalled bases.\
\ The other extreme setting is 1 which causes PEAR to process all reads independent\
\ on the number of uncalled bases.\n"
info: null
example:
- 1.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--test_method"
alternatives:
- "-g"
description: "Specify the type of statistical test. Two options are available.\
\ 1: Given the minimum allowed overlap, test using the highest OES. Note that\
\ due to its discrete nature, this test usually yields a lower p-value for the\
\ assembled read than the cut- off (specified by -p). For example, setting the\
\ cut-off to 0.05 using this test, the assembled reads might have an actual\
\ p-value of 0.02.\n2. Use the acceptance probability (m.a.p). This test methods\
\ computes the same probability as test method 1. However, it assumes that the\
\ minimal overlap is the observed overlap with the highest OES, instead of the\
\ one specified by -v. Therefore, this is not a valid statistical test and the\
\ 'p-value' is in fact the maximal probability for accepting the assembly. Nevertheless,\
\ we observed in practice that for the case the actual overlap sizes are relatively\
\ small, test 2 can correctly assemble more reads with only slightly higher\
\ false-positive rate.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--emperical_freqs"
alternatives:
- "-e"
description: "Disable empirical base frequencies.\n"
info: null
direction: "input"
- type: "integer"
name: "--score_method"
alternatives:
- "-s"
description: "Specify the scoring method. 1. OES with +1 for match and -1 for\
\ mismatch. 2: Assembly score (AS). Use +1 for match and -1 for mismatch multiplied\
\ by base quality scores. 3: Ignore quality scores and use +1 for a match and\
\ -1 for a mismatch.\n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--phred_base"
alternatives:
- "-b"
description: "Base PHRED quality score.\n"
info: null
example:
- 33
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--cap"
alternatives:
- "-c"
description: "Specify the upper bound for the resulting quality score. If set\
\ to zero, capping is disabled.\n"
info: null
example:
- 40
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--nbase"
alternatives:
- "-z"
description: "When merging a base-pair that consists of two non-equal bases out\
\ of which none is degenerate, set the merged base to N and use the highest\
\ quality score of the two bases\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "PEAR is an ultrafast, memory-efficient and highly accurate pair-end\
\ read merger. It is fully parallelized and can run with as low as just a few kilobytes\
\ of memory.\n\nPEAR evaluates all possible paired-end read overlaps and without\
\ requiring the target fragment size as input. In addition, it implements a statistical\
\ test for minimizing false-positive results. Together with a highly optimized implementation,\
\ it can merge millions of paired end reads within a couple of minutes on a standard\
\ desktop computer.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "pair-end"
- "read"
- "merge"
license: "CC-BY-NC-SA-3.0"
references:
doi:
- "10.1093/bioinformatics/btt593"
links:
repository: "https://github.com/tseemann/PEAR"
homepage: "https://cme.h-its.org/exelixis/web/software/pear"
documentation: "https://cme.h-its.org/exelixis/web/software/pear/doc.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/pear:0.9.6--h9d449c0_10"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "version=$(pear -h | grep 'PEAR v' | sed 's/PEAR v//' | sed 's/ .*//') && \\\
\necho \"pear: $version\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/pear/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/pear"
executable: "target/executable/pear/pear"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

1705
target/executable/pear/pear Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,290 @@
name: "qualimap_rnaseq"
namespace: "qualimap"
version: "v0.2"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--bam"
description: "Path to the sequence alignment file in BAM format, produced by a\
\ splicing-aware aligner."
info: null
example:
- "alignment.bam"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "Path to genomic annotations in Ensembl GTF format."
info: null
example:
- "annotations.gtf"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--qc_results"
description: "Text file containing the RNAseq QC results."
info: null
example:
- "rnaseq_qc_results.txt"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts"
description: "Output file for computed counts."
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--report"
description: "Report output file. Supported formats are PDF or HTML."
info: null
example:
- "report.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Optional"
arguments:
- type: "integer"
name: "--num_pr_bases"
description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\
\ bias (default = 100)."
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--num_tr_bias"
description: "Number of top highly expressed transcripts to compute 5'-3' bias\
\ (default = 1000)."
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--algorithm"
description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)."
info: null
required: false
choices:
- "uniquely-mapped-reads"
- "proportional"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sequencing_protocol"
description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\
\ or non-strand-specific (default))."
info: null
required: false
choices:
- "non-strand-specific"
- "strand-specific-reverse"
- "strand-specific-forward"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--paired"
description: "Setting this flag for paired-end experiments will result in counting\
\ fragments instead of reads."
info: null
direction: "input"
- type: "boolean_true"
name: "--sorted"
description: "Setting this flag indicates that the input file is already sorted\
\ by name. If flag is not set, additional sorting by name will be performed.\
\ Only requiredfor paired-end analysis."
info: null
direction: "input"
- type: "string"
name: "--java_memory_size"
description: "maximum Java heap memory size, default = 4G."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Qualimap RNA-seq QC reports quality control metrics and bias estimations\
\ \nwhich are specific for whole transcriptome sequencing, including reads genomic\
\ \norigin, junction analysis, transcript coverage and 5-3 bias computation.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "RNA-seq"
- "quality control"
- "QC Report"
license: "GPL-2.0"
references:
doi:
- "10.1093/bioinformatics/btv566"
links:
repository: "https://bitbucket.org/kokonech/qualimap/commits/branch/master"
homepage: "http://qualimap.conesalab.org/"
documentation: "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc"
issue_tracker: "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/qualimap:2.3--hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/qualimap/qualimap_rnaseq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/qualimap/qualimap_rnaseq"
executable: "target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,442 @@
name: "rsem_prepare_reference"
namespace: "rsem"
version: "v0.2"
authors:
- name: "Sai Nirmayi Yasa"
roles:
- "author"
- "maintainer"
info:
links:
email: "nirmayi@data-intuitive.com"
github: "sainirmayi"
linkedin: "sai-nirmayi-yasa"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Junior Bioinformatics Researcher"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--reference_fasta_files"
description: "Semi-colon separated list of Multi-FASTA formatted files OR a directory\
\ name. If a directory name is specified, RSEM will read all files with suffix\
\ \".fa\" or \".fasta\" in this directory. The files should contain either the\
\ sequences of transcripts or an entire genome, depending on whether the '--gtf'\
\ option is used.\n"
info: null
example:
- "read1.fasta"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--reference_name"
description: "The name of the reference used. RSEM will generate several reference-related\
\ files that are prefixed by this name. This name can contain path information\
\ (e.g. '/ref/mm9').\n"
info: null
example:
- "/ref/mm9"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Directory containing reference files generated by RSEM."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Other options"
arguments:
- type: "file"
name: "--gtf"
description: "Assume that 'reference_fasta_files' contains the sequence of a genome,\
\ and extract transcript reference sequences using the gene annotations specified\
\ in the GTF file. If this and '--gff3' options are not provided, RSEM will\
\ assume 'reference_fasta_files' contains the reference transcripts. In this\
\ case, RSEM assumes that name of each sequence in the Multi-FASTA files is\
\ its transcript_id."
info: null
example:
- "annotations.gtf"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gff3"
description: "GFF3 annotation file. Converted to GTF format with the file name\
\ 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not\
\ exist."
info: null
example:
- "annotations.gff"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gff3_rna_patterns"
description: "List of transcript categories (separated by semi-colon). Only transcripts\
\ that match the string will be extracted."
info: null
example:
- "mRNA;rRNA"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--gff3_genes_as_transcripts"
description: "This option is designed for untypical organisms, such as viruses,\
\ whose GFF3 files only contain genes. RSEM will assume each gene as a unique\
\ transcript when it converts the GFF3 file into GTF format."
info: null
direction: "input"
- type: "string"
name: "--trusted_sources"
description: "List of trusted sources (separated by semi-colon). Only transcripts\
\ coming from these sources will be extracted. If this option is off, all sources\
\ are accepted."
info: null
example:
- "ENSEMBL;HAVANA"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--transcript_to_gene_map"
description: "Use information from this file to map from transcript (isoform)\
\ ids to gene ids. Each line of this file should be of the form: \n gene_id\
\ transcript_id\nwith the two fields separated by a tab character.\nIf you are\
\ using a GTF file for the \"UCSC Genes\" gene set from the UCSC Genome Browser,\
\ then the \"knownIsoforms.txt\" file (obtained from the \"Downloads\" section\
\ of the UCSC Genome Browser site) is of this format. \nIf this option is off,\
\ then the mapping of isoforms to genes depends on whether the '--gtf' option\
\ is specified. If '--gtf' is specified, then RSEM uses the \"gene_id\" and\
\ \"transcript_id\" attributes in the GTF file. Otherwise, RSEM assumes that\
\ each sequence in the reference sequence files is a separate gene.\n"
info: null
example:
- "isoforms.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--allele_to_gene_map"
description: "Use information from <file> to provide gene_id and transcript_id\
\ information for each allele-specific transcript. Each line of <file> should\
\ be of the form:\n gene_id transcript_id allele_id\nwith the fields separated\
\ by a tab character.\nThis option is designed for quantifying allele-specific\
\ expression. It is only valid if '--gtf' option is not specified. allele_id\
\ should be the sequence names presented in the Multi-FASTA-formatted files.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--polyA"
description: "Add poly(A) tails to the end of all reference isoforms. The length\
\ of poly(A) tail added is specified by '--polyA-length' option. STAR aligner\
\ users may not want to use this option."
info: null
direction: "input"
- type: "integer"
name: "--polyA_length"
description: "The length of the poly(A) tails to be added."
info: null
example:
- 125
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--no_polyA_subset"
description: "Only meaningful if '--polyA' is specified. Do not add poly(A) tails\
\ to those transcripts listed in this file containing a list of transcript_ids."
info: null
example:
- "transcript_ids.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bowtie"
description: "Build Bowtie indices."
info: null
direction: "input"
- type: "boolean_true"
name: "--bowtie2"
description: "Build Bowtie 2 indices."
info: null
direction: "input"
- type: "boolean_true"
name: "--star"
description: "Build STAR indices."
info: null
direction: "input"
- type: "integer"
name: "--star_sjdboverhang"
description: "Length of the genomic sequence around annotated junction. It is\
\ only used for STAR to build splice junctions database and not needed for Bowtie\
\ or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According\
\ to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end\
\ reads, the ideal value is 101-1=100. In most cases, the default value of 100\
\ will work as well as the ideal value. (Default is 100)"
info: null
example:
- 100
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--hisat2_hca"
description: "Build HISAT2 indices on the transcriptome according to Human Cell\
\ Atlas (HCA) SMART-Seq2 pipeline."
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Suppress the output of logging information."
info: null
direction: "input"
- name: "Prior-enhanced RSEM options"
arguments:
- type: "boolean_true"
name: "--prep_pRSEM"
description: "A Boolean indicating whether to prepare reference files for pRSEM,\
\ including building Bowtie indices for a genome and selecting training set\
\ isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced\
\ RSEM and the training set isoforms will be used for learning prior. A path\
\ to Bowtie executables and a mappability file in bigWig format are required\
\ when this option is on. Currently, Bowtie2 is not supported for prior-enhanced\
\ RSEM."
info: null
direction: "input"
- type: "file"
name: "--mappability_bigwig_file"
description: "Full path to a whole-genome mappability file in bigWig format. This\
\ file is required for running prior-enhanced RSEM. It is used for selecting\
\ a training set of isoforms for prior-learning. This file can be either downloaded\
\ from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One)."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "RSEM is a software package for estimating gene and isoform expression\
\ levels from RNA-Seq data. This component prepares transcript references for RSEM.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Transcriptome"
- "Index"
license: "GPL-3.0"
references:
doi:
- "10.1186/1471-2105-12-323"
links:
repository: "https://github.com/deweylab/RSEM"
homepage: "http://deweylab.github.io/RSEM"
documentation: "https://deweylab.github.io/RSEM/rsem-prepare-reference.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "build-essential"
- "gcc"
- "g++"
- "make"
- "wget"
- "zlib1g-dev"
- "unzip xxd"
- "perl"
- "r-base"
- "bowtie2"
- "pip"
- "git"
interactive: false
- type: "python"
user: false
packages:
- "bowtie"
upgrade: true
- type: "docker"
run:
- "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\
\ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\
\ && \\\nunzip ${STAR_VERSION}.zip && \\\ncd STAR-${STAR_VERSION}/source &&\
\ \\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\ncp STAR /usr/local/bin\
\ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip\
\ && \\\nunzip v${RSEM_VERSION}.zip && \\\ncd RSEM-${RSEM_VERSION} && \\\nmake\
\ && \\\nmake install && \\\ncd /tmp && \\\nwget --no-check-certificate -O bowtie-${BOWTIE_VERSION}-linux-x86_64.zip\
\ https://sourceforge.net/projects/bowtie-bio/files/bowtie/${BOWTIE_VERSION}/bowtie-${BOWTIE_VERSION}-linux-x86_64.zip/download\
\ && \\\nunzip bowtie-${BOWTIE_VERSION}-linux-x86_64.zip && \\\ncp bowtie-${BOWTIE_VERSION}-linux-x86_64/bowtie*\
\ /usr/local/bin && \\\ncd /tmp && \\\ngit clone https://github.com/DaehwanKimLab/hisat2.git\
\ /tmp/hisat2 && \\\ncd /tmp/hisat2 && \\\nmake && \\\ncp -r hisat2* /usr/local/bin\
\ && \\\ncd && \\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\
\ /tmp/bowtie-${BOWTIE_VERSION}-linux-x86_64 /tmp/hisat2 && \\\napt-get --purge\
\ autoremove -y ${PACKAGES} && \\\napt-get clean \n"
env:
- "STAR_VERSION=2.7.11b"
- "RSEM_VERSION=1.3.3"
- "BOWTIE_VERSION=1.3.1"
- "TZ=Europe/Brussels"
- type: "docker"
run:
- "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\
\ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\
\ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\
\ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\
\ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\
d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\
\ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rsem/rsem_prepare_reference/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rsem/rsem_prepare_reference"
executable: "target/executable/rsem/rsem_prepare_reference/rsem_prepare_reference"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,303 @@
name: "salmon_index"
namespace: "salmon"
version: "v0.2"
authors:
- name: "Sai Nirmayi Yasa"
roles:
- "author"
- "maintainer"
info:
links:
email: "nirmayi@data-intuitive.com"
github: "sainirmayi"
linkedin: "sai-nirmayi-yasa"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Junior Bioinformatics Researcher"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--genome"
description: "Genome of the organism to prepare the set of decoy sequences. Required\
\ to build decoy-aware transcriptome.\n"
info: null
example:
- "genome.fasta"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--transcripts"
alternatives:
- "-t"
description: "Transcript fasta file.\n"
info: null
example:
- "transcriptome.fasta"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--kmer_len"
alternatives:
- "-k"
description: "The size of k-mers that should be used for the quasi index.\n"
info: null
example:
- 31
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--gencode"
description: "This flag will expect the input transcript fasta to be in GENCODE\
\ format, and will split the transcript name at the first '|' character. These\
\ reduced names will be used in the output and when looking for these transcripts\
\ in a gene to transcript GTF.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--features"
description: "This flag will expect the input reference to be in the tsv file\
\ format, and will split the feature name at the first 'tab' character. These\
\ reduced names will be used in the output and when looking for the sequence\
\ of the features.GTF.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep_duplicates"
description: "This flag will disable the default indexing behavior of discarding\
\ sequence-identical duplicate transcripts. If this flag is passed, then duplicate\
\ transcripts that appear in the input will be retained and quantified separately.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--keep_fixed_fasta"
description: "Retain the fixed fasta file (without short transcripts and duplicates,\
\ clipped, etc.) generated during indexing.\n"
info: null
direction: "input"
- type: "integer"
name: "--filter_size"
alternatives:
- "-f"
description: "The size of the Bloom filter that will be used by TwoPaCo during\
\ indexing. The filter will be of size 2^{filter_size}. The default value of\
\ -1 means that the filter size will be automatically set based on the number\
\ of distinct k-mers in the input, as estimated by nthll.\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--sparse"
description: "Build the index using a sparse sampling of k-mer positions This\
\ will require less memory (especially during quantification), but will take\
\ longer to construct and can slow down mapping / alignment.\n"
info: null
direction: "input"
- type: "file"
name: "--decoys"
alternatives:
- "-d"
description: "Treat these sequences ids from the reference as the decoys that\
\ may have sequence homologous to some known transcript. For example in case\
\ of the genome, provide a list of chromosome names (one per line).\n"
info: null
example:
- "decoys.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_clip"
description: "Don't clip poly-A tails from the ends of target sequences.\n"
info: null
direction: "input"
- type: "string"
name: "--type"
alternatives:
- "-n"
description: "The type of index to build; the only option is \"puff\" in this\
\ version of salmon.\n"
info: null
example:
- "puff"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--index"
alternatives:
- "-i"
description: "Salmon index\n"
info: null
example:
- "Salmon_index"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Salmon is a tool for wicked-fast transcript quantification from RNA-seq\
\ data. It can either make use of pre-computed alignments (in the form of a SAM/BAM\
\ file) to the transcripts rather than the raw reads, or can be run in the mapping-based\
\ mode. This component creates a salmon index for the transcriptome to use Salmon\
\ in the mapping-based mode. It is generally recommend that you build a decoy-aware\
\ transcriptome file. This is done using the entire genome of the organism as the\
\ decoy sequence by concatenating the genome to the end of the transcriptome to\
\ be indexed and populating the decoys.txt file with the chromosome names.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Transcriptome"
- "Index"
license: "GPL-3.0"
references:
doi:
- "10.1038/nmeth.4197"
links:
repository: "https://github.com/COMBINE-lab/salmon"
homepage: "https://salmon.readthedocs.io/en/latest/salmon.html"
documentation: "https://salmon.readthedocs.io/en/latest/salmon.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/salmon:1.10.2--hecfa306_0"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "salmon index -v 2>&1 | sed 's/salmon \\([0-9.]*\\)/salmon: \\1/' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/salmon/salmon_index/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/salmon/salmon_index"
executable: "target/executable/salmon/salmon_index/salmon_index"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,290 @@
name: "samtools_collate"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "The input BAM file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reference"
description: "Reference sequence FASTA FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "The output filename."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--uncompressed"
alternatives:
- "-u"
description: "Output uncompressed BAM."
info: null
direction: "input"
- type: "boolean_true"
name: "--fast"
alternatives:
- "-f"
description: "Fast mode, only primary alignments."
info: null
direction: "input"
- type: "integer"
name: "--working_reads"
alternatives:
- "-r"
description: "Working reads stored (for use with -f)."
info: null
default:
- 10000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--compression"
alternatives:
- "-l"
description: "Compression level."
info: null
default:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--nb_tmp_files"
alternatives:
- "-n"
description: "Number of temporary files."
info: null
default:
- 64
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--tmp_prefix"
alternatives:
- "-T"
description: "Write temporary files to PREFIX.nnnn.bam."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_pg"
description: "Do not add a PG line."
info: null
direction: "input"
- type: "string"
name: "--input_fmt_option"
description: "Specify a single input file format option in the form of OPTION\
\ or OPTION=VALUE."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_fmt"
description: "Specify output format (SAM, BAM, CRAM)."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_fmt_option"
description: "Specify a single output file format option in the form of OPTION\
\ or OPTION=VALUE."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Shuffles and groups reads in SAM/BAM/CRAM files together by their names."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "collate"
- "counts"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-icollate.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_collate/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_collate"
executable: "target/executable/samtools/samtools_collate/samtools_collate"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,269 @@
name: "samtools_faidx"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "FASTA input file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--length"
alternatives:
- "-n"
description: "Length for FASTA sequence line wrapping. If zero, this means do\
\ not\nline wrap. Defaults to the line length in the input file.\n"
info: null
default:
- 60
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--region_file"
alternatives:
- "-r"
description: "File of regions. Format is chr:from-to. One per line.\nMust be used\
\ with --output to avoid sending output to stdout.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--continue"
description: "Continue working if a non-existent region is requested.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--reverse_complement"
alternatives:
- "-i"
description: "Reverse complement sequences.\n"
info: null
direction: "input"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Write output to file.\n"
info: null
example:
- "output.fasta"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--mark_strand"
description: "Add strand indicator to sequence name. Options are:\n[ rc, no, sign,\
\ custom,<pos>,<neg> ]\n"
info: null
default:
- "rc"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fai_idx"
description: "Read/Write to specified index file (default file.fa.fai).\n"
info: null
example:
- "file.fa.fai"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gzi_idx"
description: "Read/Write to specified compressed file index (used with .gz files,\
\ default file.fa.gz.gzi).\n"
info: null
example:
- "file.fa.gz.gzi"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--fastq"
description: "Read FASTQ files and output extracted sequences in FASTQ format.\
\ Same as using samtools fqidx.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Indexes FASTA files to enable random access to fasta and fastq files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "idex"
- "fasta"
- "faidx"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-faidx.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_faidx/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_faidx"
executable: "target/executable/samtools/samtools_faidx/samtools_faidx"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,459 @@
name: "samtools_fasta"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "input SAM/BAM/CRAM file"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "output FASTA file"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--no_suffix"
alternatives:
- "-n"
description: "By default, either '/1' or '/2' is added to the end of read names\
\ where the corresponding \nREAD1 or READ2 FLAG bit is set. Using -n causes\
\ read names to be left as they are.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--suffix"
alternatives:
- "-N"
description: "Always add either '/1' or '/2' to the end of read names even when\
\ put into different files.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--use_oq"
alternatives:
- "-O"
description: "Use quality values from OQ tags in preference to standard quality\
\ string if available.\n"
info: null
direction: "input"
- type: "file"
name: "--singleton"
alternatives:
- "-s"
description: "write singleton reads to FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--copy_tags"
alternatives:
- "-t"
description: "Copy RG, BC and QT tags to the FASTA header line, if they exist.\n"
info: null
direction: "input"
- type: "string"
name: "--copy_tags_list"
alternatives:
- "-T"
description: "Specify a comma-separated list of tags to copy to the FASTA header\
\ line, if they exist. \nTAGLIST can be blank or `*` to indicate all tags should\
\ be copied to the output. If using `*`, \nbe careful to quote it to avoid unwanted\
\ shell expansion.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--read1"
alternatives:
- "-1"
description: "Write reads with the READ1 FLAG set (and READ2 not set) to FILE\
\ instead of outputting them. \nIf the -s option is used, only paired reads\
\ will be written to this file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--read2"
alternatives:
- "-2"
description: "Write reads with the READ2 FLAG set (and READ1 not set) to FILE\
\ instead of outputting them. \nIf the -s option is used, only paired reads\
\ will be written to this file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_reads"
alternatives:
- "-o"
description: "Write reads with either READ1 FLAG or READ2 flag set to FILE instead\
\ of outputting them to stdout. \nThis is equivalent to -1 FILE -2 FILE.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_reads_both"
alternatives:
- "0"
description: "Write reads where the READ1 and READ2 FLAG bits set are either both\
\ set or both unset to FILE \ninstead of outputting them.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--filter_flags"
alternatives:
- "-f"
description: "Only output alignments with all bits set in INT present in the FLAG\
\ field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/)\
\ or in octal by beginning with '0' \n(i.e. /^0[0-7]+/). Default: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--excl_flags"
alternatives:
- "-F"
description: "Do not output alignments with any bits set in INT present in the\
\ FLAG field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/)\
\ or in octal by beginning with '0'\n(i.e. /^0[0-7]+/). This defaults to 0x900\
\ representing filtering of secondary and \nsupplementary alignments. Default:\
\ `0x900`.\n"
info: null
example:
- "0x900"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--incl_flags"
alternatives:
- "--rf"
description: "Only output alignments with any bits set in INT present in the FLAG\
\ field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/),\
\ in octal by beginning with '0'\n(i.e. /^0[0-7]+/), as a decimal number not\
\ beginning with '0' or as a comma-separated list of \nflag names. Default:\
\ `0`.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--excl_flags_all"
alternatives:
- "-G"
description: "Only EXCLUDE reads with all of the bits set in INT present in the\
\ FLAG field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/)\
\ or in octal by beginning with '0' (i.e. /^0[0-7]+/).\nDefault: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--aux_tag"
alternatives:
- "-d"
description: "Only output alignments containing an auxiliary tag matching both\
\ TAG and VAL. If VAL is omitted \nthen any value is accepted. The tag types\
\ supported are i, f, Z, A and H. \"B\" arrays are not \nsupported. This is\
\ comparable to the method used in samtools view --tag. The option may be specified\
\ \nmultiple times and is equivalent to using the --aux_tag_file option.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--aux_tag_file"
alternatives:
- "-D"
description: "Only output alignments containing an auxiliary tag matching TAG\
\ and having a value listed in FILE. \nThe format of the file is one line per\
\ value. This is equivalent to specifying --aux_tag multiple times.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--casava"
alternatives:
- "-i"
description: "add Illumina Casava 1.8 format entry to header (eg 1:N:0:ATCACG)"
info: null
direction: "input"
- type: "integer"
name: "--compression"
alternatives:
- "-c"
description: "set compression level when writing gz or bgzf fasta files."
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--index1"
alternatives:
- "--i1"
description: "write first index reads to FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--index2"
alternatives:
- "--i2"
description: "write second index reads to FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcode_tag"
description: "Auxiliary tag to find index reads in. Default: `BC`.\n"
info: null
example:
- "BC"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_tag"
description: "Auxiliary tag to find index quality in. Default: `QT`.\n"
info: null
example:
- "QT"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--index_format"
description: "string to describe how to parse the barcode and quality tags. For\
\ example:\n* `i14i8`: the first 14 characters are index 1, the next 8 characters\
\ are index 2.\n* `n8i14`: ignore the first 8 characters, and use the next 14\
\ characters for index 1.\nIf the tag contains a separator, then the numeric\
\ part can be replaced with`*` to mean \n'read until the separator or end of\
\ tag', for example: `n*i*`.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts a SAM, BAM or CRAM to FASTA format."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "fasta"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-fasta.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_fasta/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_fasta"
executable: "target/executable/samtools/samtools_fasta/samtools_fasta"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,459 @@
name: "samtools_fastq"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "input SAM/BAM/CRAM file"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "output FASTQ file"
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--no_suffix"
alternatives:
- "-n"
description: "By default, either '/1' or '/2' is added to the end of read names\
\ where the corresponding \nREAD1 or READ2 FLAG bit is set. Using -n causes\
\ read names to be left as they are.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--suffix"
alternatives:
- "-N"
description: "Always add either '/1' or '/2' to the end of read names even when\
\ put into different files.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--use_oq"
alternatives:
- "-O"
description: "Use quality values from OQ tags in preference to standard quality\
\ string if available.\n"
info: null
direction: "input"
- type: "file"
name: "--singleton"
alternatives:
- "-s"
description: "write singleton reads to FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--copy_tags"
alternatives:
- "-t"
description: "Copy RG, BC and QT tags to the FASTQ header line, if they exist.\n"
info: null
direction: "input"
- type: "string"
name: "--copy_tags_list"
alternatives:
- "-T"
description: "Specify a comma-separated list of tags to copy to the FASTQ header\
\ line, if they exist. \nTAGLIST can be blank or `*` to indicate all tags should\
\ be copied to the output. If using `*`, \nbe careful to quote it to avoid unwanted\
\ shell expansion.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--read1"
alternatives:
- "-1"
description: "Write reads with the READ1 FLAG set (and READ2 not set) to FILE\
\ instead of outputting them. \nIf the -s option is used, only paired reads\
\ will be written to this file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--read2"
alternatives:
- "-2"
description: "Write reads with the READ2 FLAG set (and READ1 not set) to FILE\
\ instead of outputting them. \nIf the -s option is used, only paired reads\
\ will be written to this file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_reads"
alternatives:
- "-o"
description: "Write reads with either READ1 FLAG or READ2 flag set to FILE instead\
\ of outputting them to stdout. \nThis is equivalent to -1 FILE -2 FILE.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_reads_both"
alternatives:
- "0"
description: "Write reads where the READ1 and READ2 FLAG bits set are either both\
\ set or both unset to FILE \ninstead of outputting them.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--filter_flags"
alternatives:
- "-f"
description: "Only output alignments with all bits set in INT present in the FLAG\
\ field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/)\
\ or in octal by beginning with '0'\n(i.e. /^0[0-7]+/). Default: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--excl_flags"
alternatives:
- "-F"
description: "Do not output alignments with any bits set in INT present in the\
\ FLAG field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/)\
\ or in octal by beginning with '0' \n(i.e. /^0[0-7]+/). This defaults to 0x900\
\ representing filtering of secondary and \nsupplementary alignments. Default:\
\ `0x900`.\n"
info: null
example:
- "0x900"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--incl_flags"
alternatives:
- "--rf"
description: "Only output alignments with any bits set in INT present in the FLAG\
\ field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/),\
\ in octal by beginning with '0'\n(i.e. /^0[0-7]+/), as a decimal number not\
\ beginning with '0' or as a comma-separated list of \nflag names. Default:\
\ `0`.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--excl_flags_all"
alternatives:
- "-G"
description: "Only EXCLUDE reads with all of the bits set in INT present in the\
\ FLAG field. INT can be specified \nin hex by beginning with '0x' (i.e. /^0x[0-9A-F]+/)\
\ or in octal by beginning with '0' (i.e. /^0[0-7]+/).\nDefault: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--aux_tag"
alternatives:
- "-d"
description: "Only output alignments containing an auxiliary tag matching both\
\ TAG and VAL. If VAL is omitted \nthen any value is accepted. The tag types\
\ supported are i, f, Z, A and H. \"B\" arrays are not \nsupported. This is\
\ comparable to the method used in samtools view --tag. The option may be specified\
\ \nmultiple times and is equivalent to using the --aux_tag_file option.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--aux_tag_file"
alternatives:
- "-D"
description: "Only output alignments containing an auxiliary tag matching TAG\
\ and having a value listed in FILE. \nThe format of the file is one line per\
\ value. This is equivalent to specifying --aux_tag multiple times.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--casava"
alternatives:
- "-i"
description: "Add Illumina Casava 1.8 format entry to header, for example: `1:N:0:ATCACG`.\n"
info: null
direction: "input"
- type: "integer"
name: "--compression"
alternatives:
- "-c"
description: "set compression level when writing gz or bgzf fastq files."
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--index1"
alternatives:
- "--i1"
description: "write first index reads to FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--index2"
alternatives:
- "--i2"
description: "write second index reads to FILE."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcode_tag"
description: "Auxiliary tag to find index reads in. Default: `BC`.\n"
info: null
example:
- "BC"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--quality_tag"
description: "Auxiliary tag to find index quality in. Default: `QT`.\n"
info: null
example:
- "QT"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--index_format"
description: "string to describe how to parse the barcode and quality tags. For\
\ example:\n* `i14i8`: the first 14 characters are index 1, the next 8 characters\
\ are index 2.\n* `n8i14`: ignore the first 8 characters, and use the next 14\
\ characters for index 1.\nIf the tag contains a separator, then the numeric\
\ part can be replaced with '*' to mean \n'read until the separator or end of\
\ tag', for example: `n*i*`.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Converts a SAM, BAM or CRAM to FASTQ format."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "fastq"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-fastq.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_fastq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_fastq"
executable: "target/executable/samtools/samtools_fastq/samtools_fastq"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,199 @@
name: "samtools_flagstat"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--bam"
description: "BAM input files.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai"
description: "BAM index file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "File containing samtools stats output.\n"
info: null
example:
- "output.flagstat"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Counts the number of alignments in SAM/BAM/CRAM files for each FLAG\
\ type."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "stats"
- "mapping"
- "counts"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-flagstat.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_flagstat/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_flagstat"
executable: "target/executable/samtools/samtools_flagstat/samtools_flagstat"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,209 @@
name: "samtools_idxstats"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--bam"
description: "BAM input file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai"
description: "BAM index file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fasta"
description: "Reference file the CRAM was created with (optional)."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "File containing samtools stats output in tab-delimited format.\n"
info: null
example:
- "output.idxstats"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Reports alignment summary statistics for a BAM file."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "stats"
- "mapping"
- "counts"
- "chromosome"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-idxstats.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_idxstats/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_idxstats"
executable: "target/executable/samtools/samtools_idxstats/samtools_idxstats"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,215 @@
name: "samtools_index"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input file name"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file name"
info: null
example:
- "out.bam.bai"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--bai"
alternatives:
- "-b"
description: "Generate BAM index"
info: null
direction: "input"
- type: "boolean_true"
name: "--csi"
alternatives:
- "-c"
description: "Create a CSI index for BAM files instead of the traditional BAI\
\ \nindex. This will be required for genomes with larger chromosome \nsizes.\n"
info: null
direction: "input"
- type: "integer"
name: "--min_shift"
alternatives:
- "-m"
description: "Create a CSI index, with a minimum interval size of 2^INT.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Index SAM/BAM/CRAM files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "index"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-index.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_index/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_index"
executable: "target/executable/samtools/samtools_index/samtools_index"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,358 @@
name: "samtools_sort"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "SAM/BAM/CRAM input file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Write final output to file.\n"
info: null
example:
- "out.bam"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_fmt"
alternatives:
- "-O"
description: "Specify output format (SAM, BAM, CRAM).\n"
info: null
example:
- "BAM"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_fmt_option"
description: "Specify a single output file format option in the form\nof OPTION\
\ or OPTION=VALUE.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reference"
description: "Reference sequence FASTA FILE.\n"
info: null
example:
- "ref.fa"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--write_index"
description: "Automatically index the output files.\n"
info: null
direction: "input"
- type: "string"
name: "--prefix"
alternatives:
- "-T"
description: "Write temporary files to PREFIX.nnnn.bam.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_PG"
description: "Do not add a PG line.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--template_coordinate"
description: "Sort by template-coordinate.\n"
info: null
direction: "input"
- type: "string"
name: "--input_fmt_option"
description: "Specify a single input file format option in the form\nof OPTION\
\ or OPTION=VALUE.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--compression"
alternatives:
- "-l"
description: "Set compression level, from 0 (uncompressed) to 9 (best).\n"
info: null
default:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--uncompressed"
alternatives:
- "-u"
description: "Output uncompressed data (equivalent to --compression 0).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--minimiser"
alternatives:
- "-M"
description: "Use minimiser for clustering unaligned/unplaced reads.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--not_reverse"
alternatives:
- "-R"
description: "Do not use reverse strand (only compatible with --minimiser)\n"
info: null
direction: "input"
- type: "integer"
name: "--kmer_size"
alternatives:
- "-K"
description: "Kmer size to use for minimiser.\n"
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--order"
alternatives:
- "-I"
description: "Order minimisers by their position in FILE FASTA.\n"
info: null
example:
- "ref.fa"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--window"
alternatives:
- "-w"
description: "Window size for minimiser INDEXING VIA --order REF.FA.\n"
info: null
example:
- 100
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--homopolymers"
alternatives:
- "-H"
description: "Squash homopolymers when computing minimiser.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--natural_sort"
alternatives:
- "-n"
description: "Sort by read name (natural): cannot be used with samtools index.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ascii_sort"
alternatives:
- "-N"
description: "Sort by read name (ASCII): cannot be used with samtools index.\n"
info: null
direction: "input"
- type: "string"
name: "--tag"
alternatives:
- "-t"
description: "Sort by value of TAG. Uses position as secondary index \n(or read\
\ name if --natural_sort is set).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Sort SAM/BAM/CRAM file."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "sort"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-sort.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_sort/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_sort"
executable: "target/executable/samtools/samtools_sort/samtools_sort"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,427 @@
name: "samtools_stats"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input file.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai"
description: "Index file.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fasta"
description: "Reference file the CRAM was created with.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--coverage"
alternatives:
- "-c"
description: "Coverage distribution min;max;step. Default: [1, 1000, 1].\n"
info: null
example:
- 1
- 1000
- 1
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--remove_dups"
alternatives:
- "-d"
description: "Exclude from statistics reads marked as duplicates.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--customized_index_file"
alternatives:
- "-X"
description: "Use a customized index file.\n"
info: null
direction: "input"
- type: "string"
name: "--required_flag"
alternatives:
- "-f"
description: "Required flag, 0 for unset. See also `samtools flags`. Default:\
\ `\"0\"`.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--filtering_flag"
alternatives:
- "-F"
description: "Filtering flag, 0 for unset. See also `samtools flags`. Default:\
\ `0`.\n"
info: null
example:
- "0"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--GC_depth"
description: "The size of GC-depth bins (decreasing bin size increases memory\
\ requirement). Default: `20000`.\n"
info: null
example:
- 20000.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--insert_size"
alternatives:
- "-i"
description: "Maximum insert size. Default: `8000`.\n"
info: null
example:
- 8000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--id"
alternatives:
- "-I"
description: "Include only listed read group or sample name.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--read_length"
alternatives:
- "-l"
description: "Include in the statistics only reads with the given read length.\
\ Default: `-1`.\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--most_inserts"
alternatives:
- "-m"
description: "Report only the main part of inserts. Default: `0.99`.\n"
info: null
example:
- 0.99
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--split_prefix"
alternatives:
- "-P"
description: "Path or string prefix for filepaths output by --split (default is\
\ input filename).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--trim_quality"
alternatives:
- "-q"
description: "The BWA trimming parameter. Default: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--ref_seq"
alternatives:
- "-r"
description: "Reference sequence (required for GC-depth and mismatches-per-cycle\
\ calculation).\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--split"
alternatives:
- "-S"
description: "Also write statistics to separate files split by tagged field.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--target_regions"
alternatives:
- "-t"
description: "Do stats in these regions only. Tab-delimited file chr,from,to,\
\ 1-based, inclusive.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--sparse"
alternatives:
- "-x"
description: "Suppress outputting IS rows where there are no insertions.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--remove_overlaps"
alternatives:
- "-p"
description: "Remove overlaps of paired-end reads from coverage and base count\
\ computations.\n"
info: null
direction: "input"
- type: "integer"
name: "--cov_threshold"
alternatives:
- "-g"
description: "Only bases with coverage above this value will be included in the\
\ target percentage computation. Default: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--input_fmt_option"
description: "Specify a single input file format option in the form of OPTION\
\ or OPTION=VALUE.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reference"
description: "Reference sequence FASTA FILE.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output file.\n"
info: null
example:
- "out.txt"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Reports alignment summary statistics for a BAM file."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "statistics"
- "counts"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-stats.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_stats/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_stats"
executable: "target/executable/samtools/samtools_stats/samtools_stats"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,691 @@
name: "samtools_view"
namespace: "samtools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Input SAM, BAM, or CRAM file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fai_reference"
alternatives:
- "-t"
description: "A tab-delimited FILE. Each line must contain the reference name\
\ in the first column\nand the length of the reference in the second column,\
\ with one line for each distinct\nreference. Any additional fields beyond the\
\ second column are ignored. This file also\ndefines the order of the reference\
\ sequences in sorting. If you run: `samtools faidx <ref.fa>',\nthe resulting\
\ index file <ref.fa>.fai can be used as this FILE.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--reference"
alternatives:
- "-T"
description: "A FASTA format reference FILE, optionally compressed by bgzip and\
\ ideally indexed by samtools faidx.\nIf an index is not present one will be\
\ generated for you, if the reference file is local.\nIf the reference file\
\ is not local, but is accessed instead via an https://, s3:// or other URL,\n\
the index file will need to be supplied by the server alongside the reference.\
\ It is possible to\nhave the reference and index files in different locations\
\ by supplying both to this option separated\nby the string \"##idx##\", for\
\ example:\n--reference ftp://x.com/ref.fa##idx##ftp://y.com/index.fa.fai\n\
However, note that only the location of the reference will be stored in the\
\ output file header.\nIf this method is used to make CRAM files, the cram reader\
\ may not be able to find the index,\nand may not be able to decode the file\
\ unless it can get the references it needs using a different\nmethod.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--target_file"
alternatives:
- "-L"
description: "Only output alignments overlapping the input BED FILE [null].\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--region_file"
description: "Use an index and multi-region iterator to only output alignments\
\ overlapping the input BED FILE.\nEquivalent to --use_index --target_file FILE.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--qname_file"
alternatives:
- "-N"
description: "Output only alignments with read names listed in FILE. If FILE starts\
\ with ^ then the operation is\nnegated and only outputs alignment with read\
\ groups not listed in FILE. It is not permissible to mix\nboth the filter-in\
\ and filter-out style syntax in the same command.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--read_group_file"
alternatives:
- "-R"
description: "Output alignments in read groups listed in FILE [null]. If FILE\
\ starts with ^ then the operation is\nnegated and only outputs alignment with\
\ read names not listed in FILE. It is not permissible to mix\nboth the filter-in\
\ and filter-out style syntax in the same command. Note that records with no\
\ RG tag\nwill also be output when using this option. This behaviour may change\
\ in a future release.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--use_index"
alternatives:
- "-M"
description: "Use the multi-region iterator on the union of a BED file and command-line\
\ region arguments.\nThis avoids re-reading the same regions of files so can\
\ sometimes be much faster. Note this also\nremoves duplicate sequences. Without\
\ this a sequence that overlaps multiple regions specified on\nthe command line\
\ will be reported multiple times. The usage of a BED file is optional and its\
\ path\nhas to be preceded by --target_file option.\n"
info: null
direction: "input"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "Output to FILE instead of [stdout]."
info: null
example:
- "output.bam"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bam"
alternatives:
- "-b"
description: "Output in the BAM format."
info: null
direction: "input"
- type: "boolean_true"
name: "--cram"
alternatives:
- "-C"
description: "Output in the CRAM format (requires --reference).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--fast"
description: "Enable fast compression. This also changes the default output format\
\ to BAM,\nbut this can be overridden by the explicit format options or using\
\ a filename\nwith a known suffix.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--uncompressed"
alternatives:
- "-u"
description: "Output uncompressed data. This also changes the default output format\
\ to BAM,\nbut this can be overridden by the explicit format options or using\
\ a filename\nwith a known suffix.\nThis option saves time spent on compression/decompression\
\ and is thus preferred\nwhen the output is piped to another samtools command.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--with_header"
description: "Include the header in the output.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--header_only"
alternatives:
- "-H"
description: "Output the header only.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_header"
description: "When producing SAM format, output alignment records but not headers.\n\
This is the default; the option can be used to reset the effect of \n--with_header/--header_only.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--count"
alternatives:
- "-c"
description: "Instead of printing the alignments, only count them and print the\
\ total number.\nAll filter options, such as --require_flags, --excl_flags,\
\ and --min_MQ, are taken\ninto account. The --unmap option is ignored in this\
\ mode.\n"
info: null
direction: "input"
- type: "file"
name: "--output_unselected"
alternatives:
- "-U"
description: "Write alignments that are not selected by the various filter options\
\ to FILE.\nWhen this option is used, all alignments (or all alignments intersecting\
\ the regions\nspecified) are written to either the output file or this file,\
\ but never both.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--unmap"
alternatives:
- "-p"
description: "Set the UNMAP flag on alignments that are not selected by the filter\
\ options.\nThese alignments are then written to the normal output. This is\
\ not compatible\nwith --output_unselected.\n"
info: null
direction: "input"
- type: "string"
name: "--read_group"
alternatives:
- "-r"
description: "Output alignments in read group STR [null]. Note that records with\
\ no RG tag will also be output\nwhen using this option. This behaviour may\
\ change in a future release.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--tag"
alternatives:
- "-d"
description: "Only output alignments with tag STR1 and associated value STR2,\
\ which can be a string or an integer\n[null].\nThe value can be omitted, in\
\ which case only the tag is considered.\nNote that this option does not specify\
\ a tag type. For example, use --tag XX:42 to select alignments\nwith an XX:i:42\
\ field, not --tag XX:i:42.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--tag_file"
alternatives:
- "-D"
description: "Only output alignments with tag STR and associated values listed\
\ in FILE.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_MQ"
alternatives:
- "-q"
description: "Skip alignments with MAPQ smaller than INT.\n"
info: null
default:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--library"
alternatives:
- "-l"
description: "Only output alignments in library STR.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_qlen"
alternatives:
- "-m"
description: "Only output alignments with number of CIGAR bases consuming query\
\ sequence >= INT.\n"
info: null
default:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--expr"
alternatives:
- "-e"
description: "Only include alignments that match the filter expression STR. The\
\ syntax for these expressions is\ndescribed in the main samtools.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--require_flags"
alternatives:
- "-f"
description: "Only output alignments with all bits set in FLAG present in the\
\ FLAG field. FLAG can be specified\nin hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/),\
\ in octal by beginning with `0' (i.e. /^0[0-7]+/),\nas a decimal number not\
\ beginning with '0' or as a comma-separated list of flag names.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--excl_flags"
alternatives:
- "-F"
description: "Do not output alignments with any bits set in FLAG present in the\
\ FLAG field. FLAG can be specified\nin hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/),\
\ in octal by beginning with `0' (i.e. /^0[0-7]+/),\nas a decimal number not\
\ beginning with '0' or as a comma-separated list of flag names.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--excl_all_flags"
alternatives:
- "-G"
description: "Do not output alignments with all bits set in INT present in the\
\ FLAG field. This is the opposite of\n--require_flags such that --require_flags\
\ 12 --exclude_all_flags 12 is the same as no filtering at all.\nFLAG can be\
\ specified in hex by beginning with `0x' (i.e. /^0x[0-9A-F]+/), in octal by\
\ beginning with `0'\n(i.e. /^0[0-7]+/), as a decimal number not beginning with\
\ '0' or as a comma-separated list of flag names.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--incl_flags"
alternatives:
- "--rf"
description: "Only output alignments with any bit set in FLAG present in the FLAG\
\ field. FLAG can be specified in hex\nby beginning with `0x' (i.e. /^0x[0-9A-F]+/),\
\ in octal by beginning with `0' (i.e. /^0[0-7]+/), as a decimal\nnumber not\
\ beginning with '0' or as a comma-separated list of flag names.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--remove_tag"
alternatives:
- "-x"
description: "Read tag(s) to exclude from output (repeatable) [null]. This can\
\ be a single tag or a comma separated list.\nAlternatively the option itself\
\ can be repeated multiple times.\nIf the list starts with a `^' then it is\
\ negated and treated as a request to remove all tags except those in STR.\n\
The list may be empty, so --remove_tag ^ will remove all tags.\nNote that tags\
\ will only be removed from reads that pass filtering.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--keep_tag"
description: "This keeps only tags listed in STR and is directly equivalent to\
\ --remove_tag ^STR. Specifying an empty list\nwill remove all tags. If both\
\ --keep_tag and --remove_tag are specified then --keep_tag has precedence.\n\
Note that tags will only be removed from reads that pass filtering.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--remove_B"
alternatives:
- "-B"
description: "Collapse the backward CIGAR operation.\n"
info: null
direction: "input"
- type: "string"
name: "--add_flags"
description: "Adds flag(s) to read. FLAG can be specified in hex by beginning\
\ with `0x' (i.e. /^0x[0-9A-F]+/), in octal\nby beginning with `0' (i.e. /^0[0-7]+/),\
\ as a decimal number not beginning with '0' or as a comma-separated\nlist of\
\ flag names.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--remove_flags"
description: "Remove flag(s) from read. FLAG is specified in the same way as with\
\ the --add_flags option.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--subsample"
description: "Output only a proportion of the input alignments, as specified by\
\ 0.0 <= FLOAT <= 1.0, which gives the fraction\nof templates/pairs to be kept.\
\ This subsampling acts in the same way on all of the alignment records in the\
\ same\ntemplate or read pair, so it never keeps a read but not its mate.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--subsample_seed"
description: "Subsampling seed used to influence which subset of reads is kept.\
\ When subsampling data that has previously\nbeen subsampled, be sure to use\
\ a different seed value from those used previously; otherwise more reads will\n\
be retained than expected.\n"
info: null
default:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--fetch_pairs"
alternatives:
- "-P"
description: "Retrieve pairs even when the mate is outside of the requested region.\
\ Enabling this option also turns on the\nmulti-region iterator (-M). A region\
\ to search must be specified, either on the command-line, or using the\n--target_file\
\ option. The input file must be an indexed regular file.\nThis option first\
\ scans the requested region, using the RNEXT and PNEXT fields of the records\
\ that have the\nPAIRED flag set and pass other filtering options to find where\
\ paired reads are located. These locations are\nused to build an expanded region\
\ list, and a set of QNAMEs to allow from the new regions. It will then make\n\
a second pass, collecting all reads from the originally-specified region list\
\ together with reads from additional\nlocations that match the allowed set\
\ of QNAMEs. Any other filtering options used will be applied to all reads\n\
found during this second pass.\nAs this option links reads using RNEXT and PNEXT,\
\ it is important that these fields are set accurately. Use\n'samtools fixmate'\
\ to correct them if necessary.\nNote that this option does not work with the\
\ --count, --output-unselected or --unmap options.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--customized_index"
alternatives:
- "-X"
description: "Include customized index file as a part of arguments. See EXAMPLES\
\ section for sample of usage.\n"
info: null
direction: "input"
- type: "string"
name: "--sanitize"
alternatives:
- "-z"
description: "Perform some sanity checks on the state of SAM record fields, fixing\
\ up common mistakes made by aligners.\nThese include soft-clipping alignments\
\ when they extend beyond the end of the reference, marking records as\nunmapped\
\ when they have reference * or position 0, and ensuring unmapped alignments\
\ have no CIGAR or mapping\nquality for unmapped alignments and no MD, NM, CG\
\ or SM tags.\nFLAGs is a comma-separated list of keywords chosen from the following\
\ list.\n\nunmap: The UNMAPPED BAM flag. This is set for reads with position\
\ <= 0, reference name \"*\" or reads starting\nbeyond the end of the reference.\
\ Note CIGAR \"*\" is permitted for mapped data so does not trigger this.\n\n\
pos: Position and reference name fields. These may be cleared when a sequence\
\ is unmapped due to the\ncoordinates being beyond the end of the reference.\
\ Selecting this may change the sort order of the file,\nso it is not a part\
\ of the on compound argument.\nmqual: Mapping quality. This is set to zero\
\ for unmapped reads.\ncigar: Modifies CIGAR fields, either by adding soft-clips\
\ for reads that overlap the end of the reference or\n by clearing it\
\ for unmapped reads.\naux: For unmapped data, some auxiliary fields are meaningless\
\ and will be removed. These include NM, MD, CG and SM.\noff: Perform no sanity\
\ fixing. This is the default\non: Sanitize data in a way that guarantees the\
\ same sort order. This is everything except for pos.\nall: All sanitizing options,\
\ including pos.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--no_PG"
description: "Do not add a @PG line to the header of the output file.\n"
info: null
direction: "input"
- type: "string"
name: "--input_fmt_option"
description: "Specify a single input file format option in the form of OPTION\
\ or OPTION=VALUE.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_fmt"
alternatives:
- "-O"
description: "Specify output format (SAM, BAM, CRAM).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_fmt_option"
description: "Specify a single output file format option in the form of OPTION\
\ or OPTION=VALUE.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--write_index"
description: "Automatically index the output files.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Views and converts SAM/BAM/CRAM files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "view"
- "convert"
- "bam"
- "sam"
- "cram"
license: "MIT/Expat"
references:
doi:
- "10.1093/bioinformatics/btp352"
- "10.1093/gigascience/giab008"
links:
repository: "https://github.com/samtools/samtools"
homepage: "https://www.htslib.org/"
documentation: "https://www.htslib.org/doc/samtools-view.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "samtools --version 2>&1 | grep -E '^(samtools|Using htslib)' | \\\nsed 's#Using\
\ ##;s# \\([0-9\\.]*\\)$#: \\1#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/samtools/samtools_view/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/samtools/samtools_view"
executable: "target/executable/samtools/samtools_view/samtools_view"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,199 @@
name: "seqtk_sample"
namespace: "seqtk"
version: "v0.2"
authors:
- name: "Jakub Majercik"
roles:
- "author"
- "maintainer"
info:
links:
email: "jakub@data-intuitive.com"
github: "jakubmajercik"
linkedin: "jakubmajercik"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatics Engineer"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "The input FASTA/Q file."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output FASTA/Q file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--seed"
description: "Seed for random generator."
info: null
example:
- 42
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--fraction_number"
description: "Fraction or number of sequences to sample."
info: null
example:
- 0.1
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--two_pass_mode"
description: "Twice as slow but with much reduced memory"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Subsamples sequences from FASTA/Q files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "sample"
- "FASTA"
- "FASTQ"
license: "MIT"
links:
repository: "https://github.com/lh3/seqtk/tree/v1.4"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/seqtk:1.4--he4a0461_2"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/seqtk/seqtk_sample/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/seqtk/seqtk_sample"
executable: "target/executable/seqtk/seqtk_sample/seqtk_sample"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,222 @@
name: "seqtk_subseq"
namespace: "seqtk"
version: "v0.2"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "The input FASTA/Q file."
info: null
example:
- "input.fa"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--name_list"
description: "List of sequence names (name.lst) or genomic regions (reg.bed) to\
\ extract.\n"
info: null
example:
- "list.lst"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "-o"
description: "The output FASTA/Q file."
info: null
default:
- "output.fa"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--tab"
alternatives:
- "-t"
description: "TAB delimited output."
info: null
direction: "input"
- type: "boolean_true"
name: "--strand_aware"
alternatives:
- "-s"
description: "Strand aware."
info: null
direction: "input"
- type: "integer"
name: "--sequence_line_length"
alternatives:
- "-l"
description: "Sequence line length of input fasta file. Default: 0.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Extract subsequences from FASTA/Q files. Takes as input a FASTA/Q file\
\ and a name.lst (sequence ids file) or a reg.bed (genomic regions file).\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "subseq"
- "FASTA"
- "FASTQ"
license: "MIT"
links:
repository: "https://github.com/lh3/seqtk/tree/v1.4"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/seqtk:1.4--he4a0461_2"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo $(echo $(seqtk 2>&1) | sed -n 's/.*\\(Version: [^ ]*\\).*/\\1/p') > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/seqtk/seqtk_subseq/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/seqtk/seqtk_subseq"
executable: "target/executable/seqtk/seqtk_subseq/seqtk_subseq"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,359 @@
name: "star_genome_generate"
namespace: "star"
version: "v0.2"
authors:
- name: "Sai Nirmayi Yasa"
roles:
- "author"
- "maintainer"
info:
links:
email: "nirmayi@data-intuitive.com"
github: "sainirmayi"
linkedin: "sai-nirmayi-yasa"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Junior Bioinformatics Researcher"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--genome_fasta_files"
description: "Path(s) to the fasta files with the genome sequences, separated\
\ by spaces. These files should be plain text FASTA files, they *cannot* be\
\ zipped.\n"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--sjdb_gtf_file"
description: "Path to the GTF file with annotations"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--sjdb_overhang"
description: "Length of the donor/acceptor sequence on each side of the junctions,\
\ ideally = (mate_length - 1)"
info: null
example:
- 100
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sjdb_gtf_chr_prefix"
description: "Prefix for chromosome names in a GTF file (e.g. 'chr' for using\
\ ENSMEBL annotations with UCSC genomes)"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sjdb_gtf_feature_exon"
description: "Feature type in GTF file to be used as exons for building transcripts"
info: null
example:
- "exon"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sjdb_gtf_tag_exon_parent_transcript"
description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\
\ works for GTF files)"
info: null
example:
- "transcript_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sjdb_gtf_tag_exon_parent_gene"
description: "GTF attribute name for parent gene ID (default \"gene_id\" works\
\ for GTF files)"
info: null
example:
- "gene_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sjdb_gtf_tag_exon_parent_gene_name"
description: "GTF attribute name for parent gene name"
info: null
example:
- "gene_name"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--sjdb_gtf_tag_exon_parent_gene_type"
description: "GTF attribute name for parent gene type"
info: null
example:
- "gene_type"
- "gene_biotype"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "long"
name: "--limit_genome_generate_ram"
description: "Maximum available RAM (bytes) for genome generation"
info: null
example:
- 31000000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--genome_sa_index_nbases"
description: "Length (bases) of the SA pre-indexing string. Typically between\
\ 10 and 15. Longer strings will use much more memory, but allow faster searches.\
\ For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2\
\ - 1)."
info: null
example:
- 14
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--genome_chr_bin_nbits"
description: "Defined as log2(chrBin), where chrBin is the size of the bins for\
\ genome storage. Each chromosome will occupy an integer number of bins. For\
\ a genome with large number of contigs, it is recommended to scale this parameter\
\ as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)])."
info: null
example:
- 18
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--genome_sa_sparse_d"
description: "Suffux array sparsity, i.e. distance between indices. Use bigger\
\ numbers to decrease needed RAM at the cost of mapping speed reduction."
info: null
example:
- 1
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--genome_suffix_length_max"
description: "Maximum length of the suffixes, has to be longer than read length.\
\ Use -1 for infinite length."
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--genome_transform_type"
description: "Type of genome transformation\n None ... no transformation\n\
\ Haploid ... replace reference alleles with alternative alleles from VCF\
\ file (e.g. consensus allele)\n Diploid ... create two haplotypes for each\
\ chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing\
\ (e.g. personal genome)\n"
info: null
example:
- "None"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome_transform_vcf"
description: "path to VCF file for genome transformation"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--index"
description: "STAR index directory."
info: null
default:
- "STAR_index"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Create index for STAR\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "genome"
- "index"
- "align"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/bts635"
links:
repository: "https://github.com/alexdobin/STAR"
documentation: "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\n apt-get install -y --no-install-recommends ${PACKAGES}\
\ && \\\n cd /tmp && \\\n wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\
\ && \\\n unzip ${STAR_VERSION}.zip && \\\n cd STAR-${STAR_VERSION}/source\
\ && \\\n make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n cp STAR /usr/local/bin\
\ && \\\n cd / && \\\n rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\
\ && \\\n apt-get --purge autoremove -y ${PACKAGES} && \\\n apt-get clean\n"
env:
- "STAR_VERSION 2.7.11b"
- "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd"
- type: "docker"
run:
- "STAR --version | sed 's#\\(.*\\)#star: \"\\1\"#' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/star/star_genome_generate/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/star/star_genome_generate"
executable: "target/executable/star/star_genome_generate/star_genome_generate"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,637 @@
name: "umi_tools_dedup"
namespace: "umi_tools"
version: "v0.2"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "--stdin"
description: "Input BAM or SAM file. Use --in_sam to specify SAM format."
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--in_sam"
description: "By default, inputs are assumed to be in BAM format. Use this options\
\ to specify the use of SAM\nformat for input.\n"
info: null
direction: "input"
- type: "file"
name: "--bai"
description: "BAM index"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--random_seed"
description: "Random seed to initialize number generator with."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
- "--stdout"
description: "Deduplicated BAM file."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--out_sam"
description: "By default, outputa are written in BAM format. Use this options\
\ to specify the use of SAM format\nfor output.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--paired"
description: "BAM is paired end - output both read pairs. This will also force\
\ the use of the template length\nto determine reads with the same mapping coordinates.\n"
info: null
direction: "input"
- type: "string"
name: "--output_stats"
description: "Generate files containing UMI based deduplication statistics files\
\ with this prefix in the file names.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extract_umi_method"
description: "Specify the method by which the barcodes were encoded in the read.\n\
The options are:\n * read_id (default) \n * tag\n * umis\n"
info: null
example:
- "read_id"
required: false
choices:
- "read_id"
- "tag"
- "umis"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umi_tag"
description: "The tag containing the UMI sequence. This is only required if the\
\ extract_umi_method is set to tag.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umi_separator"
description: "The separator used to separate the UMI from the read sequence. This\
\ is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n"
info: null
example:
- "_"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umi_tag_split"
description: "Separate the UMI in tag by <SPLIT> and take the first element."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--umi_tag_delimiter"
description: "Separate the UMI in by <DELIMITER> and concatenate the elements."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--cell_tag"
description: "The tag containing the cell barcode sequence. This is only required\
\ if the extract_umi_method\nis set to tag.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--cell_tag_split"
description: "Separate the cell barcode in tag by <SPLIT> and take the first element."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--cell_tag_delimiter"
description: "Separate the cell barcode in by <DELIMITER> and concatenate the\
\ elements."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Grouping Options"
arguments:
- type: "string"
name: "--method"
description: "The method to use for grouping reads. \nThe options are: \n * unique\n\
\ * percentile\n * cluster\n * adjacency\n * directional (default)\n"
info: null
example:
- "directional"
required: false
choices:
- "unique"
- "percentile"
- "cluster"
- "adjacency"
- "directional"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--edit_distance_threshold"
description: "For the adjacency and cluster methods the threshold for the edit\
\ distance to connect two\nUMIs in the network can be increased. The default\
\ value of 1 works best unless the UMI is\nvery long (>14bp). Default: `1`.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--spliced_is_unique"
description: "Causes two reads that start in the same position on the same strand\
\ and having the same UMI\nto be considered unique if one is spliced and the\
\ other is not. (Uses the 'N' cigar operation\nto test for splicing).\n"
info: null
direction: "input"
- type: "integer"
name: "--soft_clip_threshold"
description: "Mappers that soft clip will sometimes do so rather than mapping\
\ a spliced read if there is only\na small overhang over the exon junction.\
\ By setting this option, you can treat reads with at\nleast this many bases\
\ soft-clipped at the 3' end as spliced. Default: `4`.\n"
info: null
example:
- 4
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--multimapping_detection_method"
description: "If the sam/bam contains tags to identify multimapping reads, you\
\ can specify for use when selecting\nthe best read at a given loci. Supported\
\ tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest\
\ mapping quality will be selected.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--read_length"
description: "Use the read length as a criteria when deduping, for e.g. sRNA-Seq."
info: null
direction: "input"
- name: "Single-cell RNA-Seq Options"
arguments:
- type: "boolean_true"
name: "--per_gene"
description: "Reads will be grouped together if they have the same gene. This\
\ is useful if your library prep\ngenerates PCR duplicates with non identical\
\ alignment positions such as CEL-Seq. Note this option\nis hardcoded to be\
\ on with the count command. I.e. counting is always performed per-gene. Must\
\ be\ncombined with either --gene_tag or --per_contig option.\n"
info: null
direction: "input"
- type: "string"
name: "--gene_tag"
description: "Deduplicate per gene. The gene information is encoded in the bam\
\ read tag specified.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--assigned_status_tag"
description: "BAM tag which describes whether a read is assigned to a gene. Defaults\
\ to the same value as given\nfor --gene_tag.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--skip_tags_regex"
description: "Use in conjunction with the --assigned_status_tag option to skip\
\ any reads where the tag matches\nthis regex. Default (\"^[__|Unassigned]\"\
) matches anything which starts with \"__\" or \"Unassigned\".\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--per_contig"
description: "Deduplicate per contig (field 3 in BAM; RNAME). All reads with the\
\ sam contig will be considered to\nhave the same alignment position. This is\
\ useful if you have aligned to a reference transcriptome\nwith one transcript\
\ per gene. If you have aligned to a transcriptome with more than one transcript\n\
per gene, you can supply a map between transcripts and gene using the --gene_transcript_map\
\ option.\n"
info: null
direction: "input"
- type: "file"
name: "--gene_transcript_map"
description: "A file containing a mapping between gene names and transcript names.\
\ The file should be tab\nseparated with the gene name in the first column and\
\ the transcript name in the second column.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--per_cell"
description: "Reads will only be grouped together if they have the same cell barcode.\
\ Can be combined with\n--per_gene.\n"
info: null
direction: "input"
- name: "SAM/BAM Options"
arguments:
- type: "integer"
name: "--mapping_quality"
description: "Minimium mapping quality (MAPQ) for a read to be retained. Default:\
\ `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--unmapped_reads"
description: "How unmapped reads should be handled. \nThe options are:\n * \"\
discard\": Discard all unmapped reads. (default)\n * \"use\": If read2\
\ is unmapped, deduplicate using read1 only. Requires --paired.\n * \"output\"\
: Output unmapped reads/read pairs without UMI grouping/deduplication. Only\
\ available in umi_tools group.\n"
info: null
example:
- "discard"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--chimeric_pairs"
description: "How chimeric pairs should be handled. \nThe options are:\n * \"\
discard\": Discard all chimeric read pairs.\n * \"use\": Deduplicate using\
\ read1 only. (default)\n * \"output\": Output chimeric pairs without UMI\
\ grouping/deduplication. Only available in\n umi_tools group.\n"
info: null
example:
- "use"
required: false
choices:
- "discard"
- "use"
- "output"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--unpaired_reads"
description: "How unpaired reads should be handled. \nThe options are: \n * \"\
discard\": Discard all unmapped reads.\n * \"use\": If read2 is unmapped, deduplicate\
\ using read1 only. Requires --paired. (default)\n * \"output\": Output unmapped\
\ reads/read pairs without UMI grouping/deduplication. Only available\n \
\ in umi_tools group.\n"
info: null
example:
- "use"
required: false
choices:
- "discard"
- "use"
- "output"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--ignore_umi"
description: "Ignore the UMI and group reads using mapping coordinates only."
info: null
direction: "input"
- type: "double"
name: "--subset"
description: "Only consider a fraction of the reads, chosen at random. This is\
\ useful for doing saturation\nanalyses.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--chrom"
description: "Only consider a single chromosome. This is useful for debugging/testing\
\ purposes."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Group/Dedup Options"
arguments:
- type: "boolean_true"
name: "--no_sort_output"
description: "By default, output is sorted. This involves the use of a temporary\
\ unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--buffer_whole_contig"
description: "Forces dedup to parse an entire contig before yielding any reads\
\ for deduplication. This is the\nonly way to absolutely guarantee that all\
\ reads with the same start position are grouped together\nfor deduplication\
\ since dedup uses the start position of the read, not the alignment coordinate\
\ on\nwhich the reads are sorted. However, by default, dedup reads for another\
\ 1000bp before outputting\nread groups which will avoid any reads being missed\
\ with short read sequencing (<1000bp).\n"
info: null
direction: "input"
- name: "Common Options"
arguments:
- type: "file"
name: "--log"
alternatives:
- "-L"
description: "File with logging information."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--log2stderr"
description: "Send logging information to stderr."
info: null
direction: "input"
- type: "integer"
name: "--verbose"
alternatives:
- "-v"
description: "Log level. The higher, the more output. Default: `0`.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--error"
alternatives:
- "-E"
description: "File with error information."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--temp_dir"
description: "Directory for temporary files. If not set, the bash environmental\
\ variable TMPDIR is used.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--compresslevel"
description: "Level of Gzip compression to use. Default=6 matches GNU gzip rather\
\ than python gzip default.\nDefault: `6`.\n"
info: null
example:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--timeit"
description: "Store timing information in file."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--timeit_name"
description: "Name in timing file for this class of jobs. Default: `all`.\n"
info: null
example:
- "all"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--timeit_header"
description: "Add header for timing information."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\
\ to the read.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "umi_tools"
- "deduplication"
- "dedup"
license: "MIT"
references:
doi:
- "10.1101/gr.209601.116"
links:
repository: "https://github.com/CGATOxford/UMI-tools"
homepage: "https://umi-tools.readthedocs.io/en/latest/"
documentation: "https://umi-tools.readthedocs.io/en/latest/reference/dedup.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1"
target_registry: "images.viash-hub.com"
target_tag: "v0.2"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/umi_tools/umi_tools_dedup/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/umi_tools/umi_tools_dedup"
executable: "target/executable/umi_tools/umi_tools_dedup/umi_tools_dedup"
viash_version: "0.9.0-RC7"
git_commit: "f22ab0eab58dcd6bff89d8d73fe951953ff1260f"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "v0.2"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v0.2'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

Some files were not shown because too many files have changed in this diff Show More