Build branch main with version main (320d044)

Build pipeline: viash-hub.biobox.main-4vvfj

Source commit: 320d044fe4

Source message: Sortmerna (#146)
This commit is contained in:
CI
2024-09-09 07:10:14 +00:00
parent 3f20a97c9c
commit beb7940138
228 changed files with 9516 additions and 265 deletions

View File

@@ -0,0 +1,617 @@
name: "sortmerna"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "boolean_true"
name: "--paired"
description: "Reads are paired-end. If a single reads file is provided, use this\
\ option \nto indicate the file contains interleaved paired reads when neither\n\
'paired_in' | 'paired_out' | 'out2' | 'sout' are specified.\n"
info: null
direction: "input"
- type: "file"
name: "--input"
description: "Input fastq"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--ref"
description: "Reference fasta file(s) for rRNA database."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--ribo_database_manifest"
description: "Text file containing paths to fasta files (one per line) that will\
\ be used to create the database for SortMeRNA."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--log"
description: "Sortmerna log file."
info: null
example:
- "$id.sortmerna.log"
must_exist: false
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output"
alternatives:
- "--aligned"
description: "Directory and file prefix for aligned output. The appropriate extension:\
\ \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf 'dir' is not specified,\
\ the output is created in the WORKDIR/out/.\nIf 'pfx' is not specified, the\
\ prefix 'aligned' is used.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--other"
description: "Create Non-aligned reads output file with this path/prefix. Must\
\ be used with fastx."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "string"
name: "--kvdb"
description: "Path to directory of the key-value database file, used for storing\
\ the alignment results."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--idx_dir"
description: "Path to the directory for storing the reference index files."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--readb"
description: "Path to the directory for storing pre-processed reads."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--fastx"
description: "Output aligned reads into FASTA/FASTQ file"
info: null
direction: "input"
- type: "boolean_true"
name: "--sam"
description: "Output SAM alignment for aligned reads."
info: null
direction: "input"
- type: "boolean_true"
name: "--sq"
description: "Add SQ tags to the SAM file"
info: null
direction: "input"
- type: "string"
name: "--blast"
description: "Blast options:\n* '0' - pairwise\n* '1' \
\ - tabular(Blast - m 8 format)\n* '1 cigar' - tabular\
\ + column for CIGAR\n* '1 cigar qcov' - tabular + columns for CIGAR\
\ and query coverage\n* '1 cigar qcov qstrand' - tabular + columns for CIGAR,\
\ query coverage and strand\n"
info: null
required: false
choices:
- "0"
- "1"
- "1 cigar"
- "1 cigar qcov"
- "1 cigar qcov qstrand"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--num_alignments"
description: "Report first INT alignments per read reaching E-value. If Int =\
\ 0, all alignments will be output. Default: '0'\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_lis"
description: "search all alignments having the first INT longest LIS. LIS stands\
\ for Longest Increasing Subsequence, it is\ncomputed using seeds positions\
\ to expand hits into longer matches prior to Smith-Waterman alignment. Default:\
\ '2'.\n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--print_all_reads"
description: "output null alignment strings for non-aligned reads to SAM and/or\
\ BLAST tabular files."
info: null
direction: "input"
- type: "boolean_true"
name: "--paired_in"
description: "In the case where a pair of reads is aligned with a score above\
\ the threshold, the output of the reads is controlled\nby the following options:\n\
* --paired_in and --paired_out are both false: Only one read per pair is output\
\ to the aligned fasta file.\n* --paired_in is true and --paired_out is false:\
\ Both reads of the pair are output to the aligned fasta file.\n* --paired_in\
\ is false and --paired_out is true: Both reads are output the the other fasta\
\ file (if it is specified).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--paired_out"
description: "See description of --paired_in."
info: null
direction: "input"
- type: "boolean_true"
name: "--out2"
description: "Output paired reads into separate files. Must be used with '--fastx'.\
\ If a single reads file is provided, this options\nimplies interleaved paired\
\ reads. When used with 'sout', four (4) output files for aligned reads will\
\ be generated:\n'aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd,\
\ aligned-singleton-rev'. If 'other' option is also used,\neight (8) output\
\ files will be generated.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--sout"
description: "Separate paired and singleton aligned reads. Must be used with '--fastx'.\
\ If a single reads file is provided,\nthis options implies interleaved paired\
\ reads. Cannot be used with '--paired_in' or '--paired_out'.\n"
info: null
direction: "input"
- type: "string"
name: "--zip_out"
description: "Compress the output files. The possible values are: \n* '1/true/t/yes/y'\n\
* '0/false/f/no/n'\n*'-1' (the same format as input - default)\nThe values are\
\ Not case sensitive.\n"
info: null
example:
- "-1"
required: false
choices:
- "1"
- "true"
- "t"
- "yes"
- "y"
- "0"
- "false"
- "f"
- "no"
- "n"
- "-1"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--match"
description: "Smith-Waterman score for a match (positive integer). Default: '2'.\n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--mismatch"
description: "Smith-Waterman penalty for a mismatch (negative integer). Default:\
\ '-3'.\n"
info: null
example:
- -3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gap_open"
description: "Smith-Waterman penalty for introducing a gap (positive integer).\
\ Default: '5'.\n"
info: null
example:
- 5
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gap_ext"
description: "Smith-Waterman penalty for extending a gap (positive integer). Default:\
\ '2'.\n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--N"
description: "Smith-Waterman penalty for ambiguous letters (Ns) scored as --mismatch.\
\ Default: '-1'.\\\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--a"
description: "Number of threads to use. Default: '1'.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--e"
description: "E-value threshold. Default: '1'.\n"
info: null
example:
- 1.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--F"
description: "Search only the forward strand."
info: null
direction: "input"
- type: "boolean_true"
name: "--R"
description: "Search only the reverse-complementary strand."
info: null
direction: "input"
- type: "integer"
name: "--num_alignment"
description: "Report first INT alignments per read reaching E-value (--num_alignments\
\ 0 signifies all alignments will be output).\nDefault: '-1'\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--best"
description: "Report INT best alignments per read reaching E-value by searching\
\ --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments\
\ will be searched) Default: '1'.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--verbose"
alternatives:
- "-v"
description: "Verbose output."
info: null
direction: "input"
- name: "OTU picking options"
arguments:
- type: "double"
name: "--id"
description: "%id similarity threshold (the alignment must still pass the E-value\
\ threshold). Default: '0.97'.\n"
info: null
example:
- 0.97
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--coverage"
description: "%query coverage threshold (the alignment must still pass the E-value\
\ threshold). Default: '0.97'.\n"
info: null
example:
- 0.97
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--de_novo"
description: "FASTA/FASTQ file for reads matching database < %id off (set using\
\ --id) and < %cov (set using --coverage)\n(alignment must still pass the E-value\
\ threshold).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--otu_map"
description: "Output OTU map (input to QIIMEs make_otu_table.py).\n"
info: null
direction: "input"
- name: "Advanced options"
arguments:
- type: "integer"
name: "--num_seed"
description: "Number of seeds matched before searching for candidate LIS. Default:\
\ '2'.\n"
info: null
example:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--passes"
description: "Three intervals at which to place the seed on the read L,L/2,3 (L\
\ is the seed length set in ./indexdb_rna).\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--edge"
description: "The number (or percentage if followed by %) of nucleotides to add\
\ to each edge of the alignment region on the\nreference sequence before performing\
\ Smith-Waterman alignment. Default: '4'.\n"
info: null
example:
- "4"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--full_search"
description: "Search for all 0-error and 1-error seed off matches in the index\
\ rather than stopping after finding a 0-error match\n(<1% gain in sensitivity\
\ with up four-fold decrease in speed).\n"
info: null
direction: "input"
- name: "Indexing Options"
arguments:
- type: "integer"
name: "--index"
description: "Create index files for the reference database. By default when this\
\ option is not used, the program checks the\nreference index and builds it\
\ if not already existing.\nThis can be changed by using '-index' as follows:\n\
* '-index 0' - skip indexing. If the index does not exist, the program will\
\ terminate\n and warn to build the index prior performing\
\ the alignment\n* '-index 1' - only perform the indexing and terminate\n* '-index\
\ 2' - the default behaviour, the same as when not using this option at all\n"
info: null
example:
- 2
required: false
choices:
- 0
- 1
- 2
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "-L"
description: "Indexing seed length. Default: '18'\n"
info: null
example:
- 18.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--interval"
description: "Index every Nth L-mer in the reference database. Default: '1'\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_pos"
description: "Maximum number of positions to store for each unique L-mer. Set\
\ to 0 to store all positions. Default: '1000'\n"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Local sequence alignment tool for filtering, mapping and clustering.\
\ The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic\
\ data.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "sort"
- "mRNA"
- "rRNA"
- "alignment"
- "filtering"
- "mapping"
- "clustering"
license: "GPL-3.0"
references:
doi:
- "10.1093/bioinformatics/bts611"
links:
repository: "https://github.com/sortmerna/sortmerna"
homepage: "https://sortmerna.readthedocs.io/en/latest/"
documentation: "https://sortmerna.readthedocs.io/en/latest/manual4.0.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y --no-install-recommends gzip cmake\
\ g++ wget && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/sortmerna/sortmerna/releases/download/v4.3.6/sortmerna-4.3.6-Linux.sh\
\ && \\\nbash sortmerna-4.3.6-Linux.sh --skip-license\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/sortmerna/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/sortmerna"
executable: "target/executable/sortmerna/sortmerna"
viash_version: "0.9.0-RC7"
git_commit: "320d044fe45e565fbc9772640ebf6f39c5584b4a"
git_remote: "https://github.com/viash-hub/biobox"
package_config:
name: "biobox"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff