Build branch main with version main (320d044)

Build pipeline: viash-hub.biobox.main-4vvfj Source commit: 320d044fe4 Source message: Sortmerna (#146)
2024-09-09 07:10:14 +00:00
parent 3f20a97c9c
commit beb7940138
228 changed files with 9516 additions and 265 deletions
--- a/target/executable/sortmerna/.config.vsh.yaml
+++ b/target/executable/sortmerna/.config.vsh.yaml
@@ -0,0 +1,617 @@
+name: "sortmerna"
+version: "main"
+argument_groups:
+- name: "Input"
+  arguments:
+  - type: "boolean_true"
+    name: "--paired"
+    description: "Reads are paired-end. If a single reads file is provided, use this\
+      \ option \nto indicate the file contains interleaved paired reads when neither\n\
+      'paired_in' | 'paired_out' | 'out2' | 'sout' are specified.\n"
+    info: null
+    direction: "input"
+  - type: "file"
+    name: "--input"
+    description: "Input fastq"
+    info: null
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--ref"
+    description: "Reference fasta file(s) for rRNA database."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--ribo_database_manifest"
+    description: "Text file containing paths to fasta files (one per line) that will\
+      \ be used to create the database for SortMeRNA."
+    info: null
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Output"
+  arguments:
+  - type: "file"
+    name: "--log"
+    description: "Sortmerna log file."
+    info: null
+    example:
+    - "$id.sortmerna.log"
+    must_exist: false
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--output"
+    alternatives:
+    - "--aligned"
+    description: "Directory and file prefix for aligned output. The appropriate extension:\
+      \ \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf 'dir' is not specified,\
+      \ the output is created in the WORKDIR/out/.\nIf 'pfx' is not specified, the\
+      \ prefix 'aligned' is used.\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--other"
+    description: "Create Non-aligned reads output file with this path/prefix. Must\
+      \ be used with fastx."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+- name: "Options"
+  arguments:
+  - type: "string"
+    name: "--kvdb"
+    description: "Path to directory of the key-value database file, used for storing\
+      \ the alignment results."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--idx_dir"
+    description: "Path to the directory for storing the reference index files."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--readb"
+    description: "Path to the directory for storing pre-processed reads."
+    info: null
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--fastx"
+    description: "Output aligned reads into FASTA/FASTQ file"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--sam"
+    description: "Output SAM alignment for aligned reads."
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--sq"
+    description: "Add SQ tags to the SAM file"
+    info: null
+    direction: "input"
+  - type: "string"
+    name: "--blast"
+    description: "Blast options:\n* '0'                    - pairwise\n* '1'     \
+      \               - tabular(Blast - m 8 format)\n* '1 cigar'              - tabular\
+      \ + column for CIGAR\n* '1 cigar qcov'         - tabular + columns for CIGAR\
+      \ and query coverage\n* '1 cigar qcov qstrand' - tabular + columns for CIGAR,\
+      \ query coverage and strand\n"
+    info: null
+    required: false
+    choices:
+    - "0"
+    - "1"
+    - "1 cigar"
+    - "1 cigar qcov"
+    - "1 cigar qcov qstrand"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--num_alignments"
+    description: "Report first INT alignments per read reaching E-value. If Int =\
+      \ 0, all alignments will be output. Default: '0'\n"
+    info: null
+    example:
+    - 0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--min_lis"
+    description: "search all alignments having the first INT longest LIS. LIS stands\
+      \ for Longest Increasing Subsequence, it is\ncomputed using seeds’ positions\
+      \ to expand hits into longer matches prior to Smith-Waterman alignment. Default:\
+      \ '2'.\n"
+    info: null
+    example:
+    - 2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--print_all_reads"
+    description: "output null alignment strings for non-aligned reads to SAM and/or\
+      \ BLAST tabular files."
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--paired_in"
+    description: "In the case where a pair of reads is aligned with a score above\
+      \ the threshold, the output of the reads is controlled\nby the following options:\n\
+      * --paired_in and --paired_out are both false: Only one read per pair is output\
+      \ to the aligned fasta file.\n* --paired_in is true and --paired_out is false:\
+      \ Both reads of the pair are output to the aligned fasta file.\n* --paired_in\
+      \ is false and --paired_out is true: Both reads are output the the other fasta\
+      \ file (if it is specified).\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--paired_out"
+    description: "See description of --paired_in."
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--out2"
+    description: "Output paired reads into separate files. Must be used with '--fastx'.\
+      \ If a single reads file is provided, this options\nimplies interleaved paired\
+      \ reads. When used with 'sout', four (4) output files for aligned reads will\
+      \ be generated:\n'aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd,\
+      \ aligned-singleton-rev'. If 'other' option is also used,\neight (8) output\
+      \ files will be generated.\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--sout"
+    description: "Separate paired and singleton aligned reads. Must be used with '--fastx'.\
+      \ If a single reads file is provided,\nthis options implies interleaved paired\
+      \ reads. Cannot be used with '--paired_in' or '--paired_out'.\n"
+    info: null
+    direction: "input"
+  - type: "string"
+    name: "--zip_out"
+    description: "Compress the output files. The possible values are: \n* '1/true/t/yes/y'\n\
+      * '0/false/f/no/n'\n*'-1' (the same format as input - default)\nThe values are\
+      \ Not case sensitive.\n"
+    info: null
+    example:
+    - "-1"
+    required: false
+    choices:
+    - "1"
+    - "true"
+    - "t"
+    - "yes"
+    - "y"
+    - "0"
+    - "false"
+    - "f"
+    - "no"
+    - "n"
+    - "-1"
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--match"
+    description: "Smith-Waterman score for a match (positive integer). Default: '2'.\n"
+    info: null
+    example:
+    - 2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--mismatch"
+    description: "Smith-Waterman penalty for a mismatch (negative integer). Default:\
+      \ '-3'.\n"
+    info: null
+    example:
+    - -3
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--gap_open"
+    description: "Smith-Waterman penalty for introducing a gap (positive integer).\
+      \ Default: '5'.\n"
+    info: null
+    example:
+    - 5
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--gap_ext"
+    description: "Smith-Waterman penalty for extending a gap (positive integer). Default:\
+      \ '2'.\n"
+    info: null
+    example:
+    - 2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--N"
+    description: "Smith-Waterman penalty for ambiguous letters (N’s) scored as --mismatch.\
+      \ Default: '-1'.\\\n"
+    info: null
+    example:
+    - -1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--a"
+    description: "Number of threads to use. Default: '1'.\n"
+    info: null
+    example:
+    - 1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--e"
+    description: "E-value threshold. Default: '1'.\n"
+    info: null
+    example:
+    - 1.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--F"
+    description: "Search only the forward strand."
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--R"
+    description: "Search only the reverse-complementary strand."
+    info: null
+    direction: "input"
+  - type: "integer"
+    name: "--num_alignment"
+    description: "Report first INT alignments per read reaching E-value (--num_alignments\
+      \ 0 signifies all alignments will be output).\nDefault: '-1'\n"
+    info: null
+    example:
+    - -1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--best"
+    description: "Report INT best alignments per read reaching E-value by searching\
+      \ --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments\
+      \ will be searched) Default: '1'.\n"
+    info: null
+    example:
+    - 1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--verbose"
+    alternatives:
+    - "-v"
+    description: "Verbose output."
+    info: null
+    direction: "input"
+- name: "OTU picking options"
+  arguments:
+  - type: "double"
+    name: "--id"
+    description: "%id similarity threshold (the alignment must still pass the E-value\
+      \ threshold). Default: '0.97'.\n"
+    info: null
+    example:
+    - 0.97
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "--coverage"
+    description: "%query coverage threshold (the alignment must still pass the E-value\
+      \ threshold). Default: '0.97'.\n"
+    info: null
+    example:
+    - 0.97
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--de_novo"
+    description: "FASTA/FASTQ file for reads matching database < %id off (set using\
+      \ --id) and < %cov (set using --coverage)\n(alignment must still pass the E-value\
+      \ threshold).\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--otu_map"
+    description: "Output OTU map (input to QIIME’s make_otu_table.py).\n"
+    info: null
+    direction: "input"
+- name: "Advanced options"
+  arguments:
+  - type: "integer"
+    name: "--num_seed"
+    description: "Number of seeds matched before searching for candidate LIS. Default:\
+      \ '2'.\n"
+    info: null
+    example:
+    - 2
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--passes"
+    description: "Three intervals at which to place the seed on the read L,L/2,3 (L\
+      \ is the seed length set in ./indexdb_rna).\n"
+    info: null
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "string"
+    name: "--edge"
+    description: "The number (or percentage if followed by %) of nucleotides to add\
+      \ to each edge of the alignment region on the\nreference sequence before performing\
+      \ Smith-Waterman alignment. Default: '4'.\n"
+    info: null
+    example:
+    - "4"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--full_search"
+    description: "Search for all 0-error and 1-error seed off matches in the index\
+      \ rather than stopping after finding a 0-error match\n(<1% gain in sensitivity\
+      \ with up four-fold decrease in speed).\n"
+    info: null
+    direction: "input"
+- name: "Indexing Options"
+  arguments:
+  - type: "integer"
+    name: "--index"
+    description: "Create index files for the reference database. By default when this\
+      \ option is not used, the program checks the\nreference index and builds it\
+      \ if not already existing.\nThis can be changed by using '-index' as follows:\n\
+      * '-index 0' - skip indexing. If the index does not exist, the program will\
+      \ terminate\n                        and warn to build the index prior performing\
+      \ the alignment\n* '-index 1' - only perform the indexing and terminate\n* '-index\
+      \ 2' - the default behaviour, the same as when not using this option at all\n"
+    info: null
+    example:
+    - 2
+    required: false
+    choices:
+    - 0
+    - 1
+    - 2
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "double"
+    name: "-L"
+    description: "Indexing seed length. Default: '18'\n"
+    info: null
+    example:
+    - 18.0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--interval"
+    description: "Index every Nth L-mer in the reference database. Default: '1'\n"
+    info: null
+    example:
+    - 1
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--max_pos"
+    description: "Maximum number of positions to store for each unique L-mer. Set\
+      \ to 0 to store all positions. Default: '1000'\n"
+    info: null
+    example:
+    - 1000
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "bash_script"
+  path: "script.sh"
+  is_executable: true
+description: "Local sequence alignment tool for filtering, mapping and clustering.\
+  \ The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic\
+  \ data.\n"
+test_resources:
+- type: "bash_script"
+  path: "test.sh"
+  is_executable: true
+- type: "file"
+  path: "test_data"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+keywords:
+- "sort"
+- "mRNA"
+- "rRNA"
+- "alignment"
+- "filtering"
+- "mapping"
+- "clustering"
+license: "GPL-3.0"
+references:
+  doi:
+  - "10.1093/bioinformatics/bts611"
+links:
+  repository: "https://github.com/sortmerna/sortmerna"
+  homepage: "https://sortmerna.readthedocs.io/en/latest/"
+  documentation: "https://sortmerna.readthedocs.io/en/latest/manual4.0.html"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "ubuntu:22.04"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    run:
+    - "apt-get update && \\\napt-get install -y --no-install-recommends gzip cmake\
+      \ g++ wget && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/sortmerna/sortmerna/releases/download/v4.3.6/sortmerna-4.3.6-Linux.sh\
+      \ && \\\nbash sortmerna-4.3.6-Linux.sh --skip-license\n"
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/sortmerna/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/sortmerna"
+  executable: "target/executable/sortmerna/sortmerna"
+  viash_version: "0.9.0-RC7"
+  git_commit: "320d044fe45e565fbc9772640ebf6f39c5584b4a"
+  git_remote: "https://github.com/viash-hub/biobox"
+package_config:
+  name: "biobox"
+  version: "main"
+  description: "A collection of bioinformatics tools for working with sequence data.\n"
+  info: null
+  viash_version: "0.9.0-RC7"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "modules"
+  - "sequencing"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/biobox"
+    issue_tracker: "https://github.com/viash-hub/biobox/issues"
--- a/target/executable/sortmerna/sortmerna
+++ b/target/executable/sortmerna/sortmerna