biobox/target/executable/umi_tools/umi_tools_extract/.config.vsh.yaml

name: "umi_tools_extract"
namespace: "umi_tools"
version: "main"
argument_groups:
- name: "Input"
  arguments:
  - type: "file"
    name: "--input"
    description: "File containing the input data."
    info: null
    example:
    - "sample.fastq"
    must_exist: true
    create_parent: true
    required: true
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--read2_in"
    description: "File containing the input data for the R2 reads (if paired). If\
      \ provided, a <list of other required arguments> need to be provided."
    info: null
    example:
    - "sample_R2.fastq"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--bc_pattern"
    alternatives:
    - "-p"
    description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\
      \ first 6 nucleotides \nof the read are from the UMI.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--bc_pattern2"
    description: "The UMI barcode pattern to use for read 2."
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output"
  arguments:
  - type: "file"
    name: "--output"
    description: "Output file for read 1."
    info: null
    must_exist: true
    create_parent: true
    required: true
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--read2_out"
    description: "Output file for read 2."
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--filtered_out"
    description: "Write out reads not matching regex pattern or cell barcode whitelist\
      \ to this file.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--filtered_out2"
    description: "Write out read pairs not matching regex pattern or cell barcode\
      \ whitelist to this file.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Extract Options"
  arguments:
  - type: "string"
    name: "--extract_method"
    description: "UMI pattern to use. Default: `string`.\n"
    info: null
    example:
    - "string"
    required: false
    choices:
    - "string"
    - "regex"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--error_correct_cell"
    description: "Error correct cell barcodes to the whitelist."
    info: null
    direction: "input"
  - type: "file"
    name: "--whitelist"
    description: "Whitelist of accepted cell barcodes tab-separated format, where\
      \ column 1 is the whitelisted\ncell barcodes and column 2 is the list (comma-separated)\
      \ of other cell barcodes which should \nbe corrected to the barcode in column\
      \ 1. If the --error_correct_cell option is not used, this\ncolumn will be ignored.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--blacklist"
    description: "BlackWhitelist of cell barcodes to discard."
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--subset_reads"
    description: "Only parse the first N reads."
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--quality_filter_threshold"
    description: "Remove reads where any UMI base quality score falls below this threshold."
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--quality_filter_mask"
    description: "If a UMI base has a quality below this threshold, replace the base\
      \ with 'N'.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--quality_encoding"
    description: "Quality score encoding. Choose from:\n  * phred33 [33-77]\n  * phred64\
      \ [64-106]\n  * solexa [59-106]\n"
    info: null
    required: false
    choices:
    - "phred33"
    - "phred64"
    - "solexa"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--reconcile_pairs"
    description: "Allow read 2 infile to contain reads not in read 1 infile. This\
      \ enables support for upstream protocols\nwhere read one contains cell barcodes,\
      \ and the read pairs have been filtered and corrected without regard\nto the\
      \ read2.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--three_prime"
    alternatives:
    - "--3prime"
    description: "By default the barcode is assumed to be on the 5' end of the read,\
      \ but use this option to sepecify that it is\non the 3' end instead. This option\
      \ only works with --extract_method=string since 3' encoding can be specified\n\
      explicitly with a regex, e.g `.*(?P<umi_1>.{5})$`.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--ignore_read_pair_suffixes"
    description: "Ignore \"/1\" and \"/2\" read name suffixes. Note that this options\
      \ is required if the suffixes are not whitespace\nseparated from the rest of\
      \ the read name.\narguments:\n"
    info: null
    direction: "input"
  - type: "string"
    name: "--umi_separator"
    description: "The character that separates the UMI in the read name. Most likely\
      \ a colon if you skipped the extraction with\nUMI-tools and used other software.\
      \ Default: `_`\n"
    info: null
    example:
    - "_"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--grouping_method"
    description: "Method to use to determine read groups by subsuming those with similar\
      \ UMIs. All methods start by identifying\nthe reads with the same mapping position,\
      \ but treat similar yet nonidentical UMIs differently. Default: `directional`\n"
    info: null
    example:
    - "directional"
    required: false
    choices:
    - "unique"
    - "percentile"
    - "cluster"
    - "adjacency"
    - "directional"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--umi_discard_read"
    description: "After UMI barcode extraction discard either R1 or R2 by setting\
      \ this parameter to 1 or 2, respectively. Default: `0`\n"
    info: null
    example:
    - 0
    required: false
    choices:
    - 0
    - 1
    - 2
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Common Options"
  arguments:
  - type: "file"
    name: "--log"
    description: "File with logging information."
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--log2stderr"
    description: "Send logging information to stderr."
    info: null
    direction: "output"
  - type: "integer"
    name: "--verbose"
    description: "Log level. The higher, the more output."
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--error"
    description: "File with error information."
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--temp_dir"
    description: "Directory for temporary files. If not set, the bash environmental\
      \ variable TMPDIR is used.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--compresslevel"
    description: "Level of Gzip compression to use. Default=6 matches GNU gzip rather\
      \ than python gzip default (which is 9).\nDefault `6`.\n"
    info: null
    example:
    - 6
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--timeit"
    description: "Store timing information in file."
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--timeit_name"
    description: "Name in timing file for this class of jobs."
    info: null
    default:
    - "all"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--timeit_header"
    description: "Add header for timing information."
    info: null
    direction: "input"
  - type: "integer"
    name: "--random_seed"
    description: "Random seed to initialize number generator with."
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
resources:
- type: "bash_script"
  path: "script.sh"
  is_executable: true
description: "Flexible removal of UMI sequences from fastq reads.\nUMIs are removed\
  \ and appended to the read name. Any other barcode, for example a library barcode,\n\
  is left on the read. Can also filter reads by quality or against a whitelist.\n"
test_resources:
- type: "bash_script"
  path: "test.sh"
  is_executable: true
- type: "file"
  path: "test_data"
info: null
status: "enabled"
requirements:
  commands:
  - "ps"
keywords:
- "extract"
- "umi-tools"
- "umi"
- "fastq"
license: "MIT"
references:
  doi:
  - "10.1101/gr.209601.116"
links:
  repository: "https://github.com/CGATOxford/UMI-tools"
  homepage: "https://umi-tools.readthedocs.io/en/latest/"
  documentation: "https://umi-tools.readthedocs.io/en/latest/reference/extract.html"
runners:
- type: "executable"
  id: "executable"
  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
  id: "nextflow"
  directives:
    tag: "$id"
  auto:
    simplifyInput: true
    simplifyOutput: false
    transcript: false
    publish: false
  config:
    labels:
      mem1gb: "memory = 1000000000.B"
      mem2gb: "memory = 2000000000.B"
      mem5gb: "memory = 5000000000.B"
      mem10gb: "memory = 10000000000.B"
      mem20gb: "memory = 20000000000.B"
      mem50gb: "memory = 50000000000.B"
      mem100gb: "memory = 100000000000.B"
      mem200gb: "memory = 200000000000.B"
      mem500gb: "memory = 500000000000.B"
      mem1tb: "memory = 1000000000000.B"
      mem2tb: "memory = 2000000000000.B"
      mem5tb: "memory = 5000000000000.B"
      mem10tb: "memory = 10000000000000.B"
      mem20tb: "memory = 20000000000000.B"
      mem50tb: "memory = 50000000000000.B"
      mem100tb: "memory = 100000000000000.B"
      mem200tb: "memory = 200000000000000.B"
      mem500tb: "memory = 500000000000000.B"
      mem1gib: "memory = 1073741824.B"
      mem2gib: "memory = 2147483648.B"
      mem4gib: "memory = 4294967296.B"
      mem8gib: "memory = 8589934592.B"
      mem16gib: "memory = 17179869184.B"
      mem32gib: "memory = 34359738368.B"
      mem64gib: "memory = 68719476736.B"
      mem128gib: "memory = 137438953472.B"
      mem256gib: "memory = 274877906944.B"
      mem512gib: "memory = 549755813888.B"
      mem1tib: "memory = 1099511627776.B"
      mem2tib: "memory = 2199023255552.B"
      mem4tib: "memory = 4398046511104.B"
      mem8tib: "memory = 8796093022208.B"
      mem16tib: "memory = 17592186044416.B"
      mem32tib: "memory = 35184372088832.B"
      mem64tib: "memory = 70368744177664.B"
      mem128tib: "memory = 140737488355328.B"
      mem256tib: "memory = 281474976710656.B"
      mem512tib: "memory = 562949953421312.B"
      cpu1: "cpus = 1"
      cpu2: "cpus = 2"
      cpu5: "cpus = 5"
      cpu10: "cpus = 10"
      cpu20: "cpus = 20"
      cpu50: "cpus = 50"
      cpu100: "cpus = 100"
      cpu200: "cpus = 200"
      cpu500: "cpus = 500"
      cpu1000: "cpus = 1000"
  debug: false
  container: "docker"
engines:
- type: "docker"
  id: "docker"
  image: "quay.io/biocontainers/umi_tools:1.1.4--py310h4b81fae_2"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
  setup:
  - type: "docker"
    run:
    - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n"
  entrypoint: []
  cmd: null
- type: "native"
  id: "native"
build_info:
  config: "src/umi_tools/umi_tools_extract/config.vsh.yaml"
  runner: "executable"
  engine: "docker|native"
  output: "target/executable/umi_tools/umi_tools_extract"
  executable: "target/executable/umi_tools/umi_tools_extract/umi_tools_extract"
  viash_version: "0.9.0-RC6"
  git_commit: "766ab6c9c3059004c7c3f205621909b2d8b0b26d"
  git_remote: "https://github.com/viash-hub/biobox"
package_config:
  name: "biobox"
  version: "main"
  description: "A collection of bioinformatics tools for working with sequence data.\n"
  info: null
  viash_version: "0.9.0-RC6"
  source: "src"
  target: "target"
  config_mods:
  - ".requirements.commands := ['ps']\n"
  - ".engines += { type: \"native\" }"
  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
  - ".engines[.type == 'docker'].target_tag := 'main'"
  keywords:
  - "bioinformatics"
  - "modules"
  - "sequencing"
  license: "MIT"
  organization: "vsh"
  links:
    repository: "https://github.com/viash-hub/biobox"
    issue_tracker: "https://github.com/viash-hub/biobox/issues"