biobox/target/executable/cutadapt/.config.vsh.yaml

name: "cutadapt"
version: "main"
authors:
- name: "Toni Verbeiren"
  roles:
  - "author"
  - "maintainer"
  info:
    links:
      github: "tverbeiren"
      linkedin: "verbeiren"
    organizations:
    - name: "Data Intuitive"
      href: "https://www.data-intuitive.com"
      role: "Data Scientist and CEO"
argument_groups:
- name: "Specify Adapters for R1"
  arguments:
  - type: "string"
    name: "--adapter"
    alternatives:
    - "-a"
    description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
      \ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
      \ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
      \ the read.\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--front"
    alternatives:
    - "-g"
    description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
      \ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
      \ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
      \ the adapter is\nonly found if it is a prefix of the read.\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--anywhere"
    alternatives:
    - "-b"
    description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
      \ (paired data: of the first read). Both types of\nmatches as described under\
      \ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
      \ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
      for rescuing failed library preparations - do not use if\nyou know which end\
      \ your adapter was ligated to!\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R1"
  arguments:
  - type: "file"
    name: "--adapter_fasta"
    description: "Fasta file containing sequences of an adapter ligated to the 3'\
      \ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
      trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
      \ found if it is a suffix of the read.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--front_fasta"
    description: "Fasta file containing sequences of an adapter ligated to the 5'\
      \ end (paired data:\nof the first read). The adapter and any preceding bases\n\
      are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
      \ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
      \ the read.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--anywhere_fasta"
    description: "Fasta file containing sequences of an adapter that may be ligated\
      \ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
      \ as described under -a and -g are allowed. If the\nfirst base of the read is\
      \ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
      \ option is mostly\nfor rescuing failed library preparations - do not use if\n\
      you know which end your adapter was ligated to!\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Specify Adapters for R2"
  arguments:
  - type: "string"
    name: "--adapter_r2"
    alternatives:
    - "-A"
    description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\
      \ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\
      \ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\
      \ the read.\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--front_r2"
    alternatives:
    - "-G"
    description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\
      \ first read). The adapter and any preceding bases\nare trimmed. Partial matches\
      \ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\
      \ the adapter is\nonly found if it is a prefix of the read.\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--anywhere_r2"
    alternatives:
    - "-B"
    description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\
      \ (paired data: of the first read). Both types of\nmatches as described under\
      \ -a and -g are allowed. If the\nfirst base of the read is part of the match,\
      \ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\
      for rescuing failed library preparations - do not use if\nyou know which end\
      \ your adapter was ligated to!\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Specify Adapters using Fasta files for R2"
  arguments:
  - type: "file"
    name: "--adapter_r2_fasta"
    description: "Fasta file containing sequences of an adapter ligated to the 3'\
      \ end (paired data:\nof the first read). The adapter and subsequent bases are\n\
      trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\
      \ found if it is a suffix of the read.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--front_r2_fasta"
    description: "Fasta file containing sequences of an adapter ligated to the 5'\
      \ end (paired data:\nof the first read). The adapter and any preceding bases\n\
      are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\
      \ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\
      \ the read.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--anywhere_r2_fasta"
    description: "Fasta file containing sequences of an adapter that may be ligated\
      \ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\
      \ as described under -a and -g are allowed. If the\nfirst base of the read is\
      \ part of the match, the behavior\nis as with -g, otherwise as with -a. This\
      \ option is mostly\nfor rescuing failed library preparations - do not use if\n\
      you know which end your adapter was ligated to!\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Paired-end options"
  arguments:
  - type: "boolean_true"
    name: "--pair_adapters"
    description: "Treat adapters given with -a/-A etc. as pairs. Either both\nor none\
      \ are removed from each read pair.\n"
    info: null
    direction: "input"
  - type: "string"
    name: "--pair_filter"
    description: "Which of the reads in a paired-end read have to match the\nfiltering\
      \ criterion in order for the pair to be filtered.\n"
    info: null
    required: false
    choices:
    - "any"
    - "both"
    - "first"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--interleaved"
    description: "Read and/or write interleaved paired-end reads.\n"
    info: null
    direction: "input"
- name: "Input parameters"
  arguments:
  - type: "file"
    name: "--input"
    description: "Input fastq file for single-end reads or R1 for paired-end reads.\n"
    info: null
    must_exist: true
    create_parent: true
    required: true
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--input_r2"
    description: "Input fastq file for R2 in the case of paired-end reads.\n"
    info: null
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--error_rate"
    alternatives:
    - "-E"
    - "--errors"
    description: "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber\
      \ of errors for full-length adapter match (if E is an\ninteger >= 1). Error\
      \ rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n"
    info: null
    example:
    - 0.1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_false"
    name: "--no_indels"
    description: "Allow only mismatches in alignments.\n"
    info: null
    direction: "input"
  - type: "integer"
    name: "--times"
    alternatives:
    - "-n"
    description: "Remove up to COUNT adapters from each read. Default: 1.\n"
    info: null
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--overlap"
    alternatives:
    - "-O"
    description: "Require MINLENGTH overlap between read and adapter for an\nadapter\
      \ to be found. The default is 3.\n"
    info: null
    example:
    - 3
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--match_read_wildcards"
    description: "Interpret IUPAC wildcards in reads.\n"
    info: null
    direction: "input"
  - type: "boolean_false"
    name: "--no_match_adapter_wildcards"
    description: "Do not interpret IUPAC wildcards in adapters.\n"
    info: null
    direction: "input"
  - type: "string"
    name: "--action"
    description: "What to do if a match was found. trim: trim adapter and\nup- or\
      \ downstream sequence; retain: trim, but retain\nadapter; mask: replace with\
      \ 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\n\
      The default is trim.\n"
    info: null
    example:
    - "trim"
    required: false
    choices:
    - "trim"
    - "retain"
    - "mask"
    - "lowercase"
    - "none"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--revcomp"
    alternatives:
    - "--rc"
    description: "Check both the read and its reverse complement for adapter\nmatches.\
      \ If match is on reverse-complemented version,\noutput that one.\n"
    info: null
    direction: "input"
- name: "Demultiplexing options"
  arguments:
  - type: "string"
    name: "--demultiplex_mode"
    description: "Enable demultiplexing and set the mode for it.\nWith mode 'unique_dual',\
      \ adapters from the first and second read are used,\nand the indexes from the\
      \ reads are only used in pairs. This implies\n--pair_adapters.\nEnabling mode\
      \ 'combinatorial_dual' allows all combinations of the sets of indexes\non R1\
      \ and R2. It is necessary to write each read pair to an output\nfile depending\
      \ on the adapters found on both R1 and R2.\nMode 'single', uses indexes or barcodes\
      \ located at the 5'\nend of the R1 read (single). \n"
    info: null
    required: false
    choices:
    - "single"
    - "unique_dual"
    - "combinatorial_dual"
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Read modifications"
  arguments:
  - type: "integer"
    name: "--cut"
    alternatives:
    - "-u"
    description: "Remove LEN bases from each read (or R1 if paired; use --cut_r2\n\
      option for R2). If LEN is positive, remove bases from the\nbeginning. If LEN\
      \ is negative, remove bases from the end.\nCan be used twice if LENs have different\
      \ signs. Applied\n*before* adapter trimming.\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--cut_r2"
    description: "Remove LEN bases from each read (for R2). If LEN is positive, remove\
      \ bases from the\nbeginning. If LEN is negative, remove bases from the end.\n\
      Can be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n"
    info: null
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--nextseq_trim"
    description: "NextSeq-specific quality trimming (each read). Trims also\ndark\
      \ cycles appearing as high-quality G bases.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--quality_cutoff"
    alternatives:
    - "-q"
    description: "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore\
      \ adapter removal. Applied to both reads if data is\npaired. If one value is\
      \ given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given,\
      \ the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--quality_cutoff_r2"
    alternatives:
    - "-Q"
    description: "Quality-trimming cutoff for R2. Default: same as for R1\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--quality_base"
    description: "Assume that quality values in FASTQ are encoded as\nascii(quality\
      \ + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default\
      \ is 33.\n"
    info: null
    example:
    - 33
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--poly_a"
    description: "Trim poly-A tails"
    info: null
    direction: "input"
  - type: "integer"
    name: "--length"
    alternatives:
    - "-l"
    description: "Shorten reads to LENGTH. Positive values remove bases at\nthe end\
      \ while negative ones remove bases at the beginning.\nThis and the following\
      \ modifications are applied after\nadapter trimming.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--trim_n"
    description: "Trim N's on ends of reads."
    info: null
    direction: "input"
  - type: "string"
    name: "--length_tag"
    description: "Search for TAG followed by a decimal number in the\ndescription\
      \ field of the read. Replace the decimal number\nwith the correct length of\
      \ the trimmed read. For example,\nuse --length-tag 'length=' to correct fields\
      \ like\n'length=123'.\n"
    info: null
    example:
    - "length="
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--strip_suffix"
    description: "Remove this suffix from read names if present. Can be\ngiven multiple\
      \ times.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--prefix"
    alternatives:
    - "-x"
    description: "Add this prefix to read names. Use {name} to insert the\nname of\
      \ the matching adapter.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--suffix"
    alternatives:
    - "-y"
    description: "Add this suffix to read names; can also include {name}\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--rename"
    description: "Rename reads using TEMPLATE containing variables such as\n{id},\
      \ {adapter_name} etc. (see documentation)\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--zero_cap"
    alternatives:
    - "-z"
    description: "Change negative quality values to zero."
    info: null
    direction: "input"
- name: "Filtering of processed reads"
  description: "Filters are applied after above read modifications. Paired-end reads\
    \ are\nalways discarded pairwise (see also --pair_filter).\n"
  arguments:
  - type: "string"
    name: "--minimum_length"
    alternatives:
    - "-m"
    description: "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end\
      \ reads, the minimum lengths for R1 and R2 can be specified separately by separating\
      \ them with a colon (:).\nIf the colon syntax is not used, the same minimum\
      \ length applies to both reads, as discussed above.\nAlso, one of the values\
      \ can be omitted to impose no restrictions.\nFor example, with -m 17:, the length\
      \ of R1 must be at least 17, but the length of R2 is ignored.\n"
    info: null
    example:
    - "0"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--maximum_length"
    alternatives:
    - "-M"
    description: "Discard reads longer than LEN. Default: no limit.\nFor paired reads,\
      \ see the remark for --minimum_length\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--max_n"
    description: "Discard reads with more than COUNT 'N' bases. If COUNT is\na number\
      \ between 0 and 1, it is interpreted as a fraction\nof the read length.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "long"
    name: "--max_expected_errors"
    alternatives:
    - "--max_ee"
    description: "Discard reads whose expected number of errors (computed\nfrom quality\
      \ values) exceeds ERRORS.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "long"
    name: "--max_average_error_rate"
    alternatives:
    - "--max_aer"
    description: "as --max_expected_errors (see above), but divided by\nlength to\
      \ account for reads of varying length.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--discard_trimmed"
    alternatives:
    - "--discard"
    description: "Discard reads that contain an adapter. Use also -O to\navoid discarding\
      \ too many randomly matching reads.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--discard_untrimmed"
    alternatives:
    - "--trimmed_only"
    description: "Discard reads that do not contain an adapter.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--discard_casava"
    description: "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n"
    info: null
    direction: "input"
- name: "Output parameters"
  arguments:
  - type: "string"
    name: "--report"
    description: "Which type of report to print: 'full' (default) or 'minimal'.\n"
    info: null
    example:
    - "full"
    required: false
    choices:
    - "full"
    - "minimal"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--json"
    description: "Write report in JSON format to this file.\n"
    info: null
    direction: "input"
  - type: "file"
    name: "--output"
    description: "Glob pattern for matching the expected output files.\nShould include\
      \ `$output_dir`.\n"
    info: null
    example:
    - "fastq/*_001.fast[a,q]"
    must_exist: true
    create_parent: true
    required: true
    direction: "output"
    multiple: true
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--fasta"
    description: "Output FASTA to standard output even on FASTQ input.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--info_file"
    description: "Write information about each read and its adapter matches\ninto\
      \ info.txt in the output directory.\nSee the documentation for the file format.\n"
    info: null
    direction: "input"
- name: "Debug"
  arguments:
  - type: "boolean_true"
    name: "--debug"
    description: "Print debug information"
    info: null
    direction: "input"
resources:
- type: "bash_script"
  path: "script.sh"
  is_executable: true
description: "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n"
test_resources:
- type: "bash_script"
  path: "test.sh"
  is_executable: true
info: null
status: "enabled"
requirements:
  commands:
  - "ps"
keywords:
- "RNA-seq"
- "scRNA-seq"
- "high-throughput"
license: "MIT"
references:
  doi:
  - "10.14806/ej.17.1.200"
links:
  repository: "https://github.com/marcelm/cutadapt"
  homepage: "https://cutadapt.readthedocs.io"
  documentation: "https://cutadapt.readthedocs.io"
runners:
- type: "executable"
  id: "executable"
  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
  id: "nextflow"
  directives:
    tag: "$id"
  auto:
    simplifyInput: true
    simplifyOutput: false
    transcript: false
    publish: false
  config:
    labels:
      mem1gb: "memory = 1000000000.B"
      mem2gb: "memory = 2000000000.B"
      mem5gb: "memory = 5000000000.B"
      mem10gb: "memory = 10000000000.B"
      mem20gb: "memory = 20000000000.B"
      mem50gb: "memory = 50000000000.B"
      mem100gb: "memory = 100000000000.B"
      mem200gb: "memory = 200000000000.B"
      mem500gb: "memory = 500000000000.B"
      mem1tb: "memory = 1000000000000.B"
      mem2tb: "memory = 2000000000000.B"
      mem5tb: "memory = 5000000000000.B"
      mem10tb: "memory = 10000000000000.B"
      mem20tb: "memory = 20000000000000.B"
      mem50tb: "memory = 50000000000000.B"
      mem100tb: "memory = 100000000000000.B"
      mem200tb: "memory = 200000000000000.B"
      mem500tb: "memory = 500000000000000.B"
      mem1gib: "memory = 1073741824.B"
      mem2gib: "memory = 2147483648.B"
      mem4gib: "memory = 4294967296.B"
      mem8gib: "memory = 8589934592.B"
      mem16gib: "memory = 17179869184.B"
      mem32gib: "memory = 34359738368.B"
      mem64gib: "memory = 68719476736.B"
      mem128gib: "memory = 137438953472.B"
      mem256gib: "memory = 274877906944.B"
      mem512gib: "memory = 549755813888.B"
      mem1tib: "memory = 1099511627776.B"
      mem2tib: "memory = 2199023255552.B"
      mem4tib: "memory = 4398046511104.B"
      mem8tib: "memory = 8796093022208.B"
      mem16tib: "memory = 17592186044416.B"
      mem32tib: "memory = 35184372088832.B"
      mem64tib: "memory = 70368744177664.B"
      mem128tib: "memory = 140737488355328.B"
      mem256tib: "memory = 281474976710656.B"
      mem512tib: "memory = 562949953421312.B"
      cpu1: "cpus = 1"
      cpu2: "cpus = 2"
      cpu5: "cpus = 5"
      cpu10: "cpus = 10"
      cpu20: "cpus = 20"
      cpu50: "cpus = 50"
      cpu100: "cpus = 100"
      cpu200: "cpus = 200"
      cpu500: "cpus = 500"
      cpu1000: "cpus = 1000"
  debug: false
  container: "docker"
engines:
- type: "docker"
  id: "docker"
  image: "python:3.12"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
  setup:
  - type: "python"
    user: false
    pip:
    - "cutadapt"
    upgrade: true
  - type: "docker"
    run:
    - "cutadapt --version | sed 's/\\(.*\\)/cutadapt: \"\\1\"/' > /var/software_versions.txt\n"
  entrypoint: []
  cmd: null
- type: "native"
  id: "native"
build_info:
  config: "src/cutadapt/config.vsh.yaml"
  runner: "executable"
  engine: "docker|native"
  output: "target/executable/cutadapt"
  executable: "target/executable/cutadapt/cutadapt"
  viash_version: "0.9.0"
  git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
  git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
  git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
  name: "biobox"
  version: "main"
  description: "A collection of bioinformatics tools for working with sequence data.\n"
  info: null
  viash_version: "0.9.0"
  source: "src"
  target: "target"
  config_mods:
  - ".requirements.commands := ['ps']\n"
  - ".engines += { type: \"native\" }"
  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
  - ".engines[.type == 'docker'].target_tag := 'main'"
  keywords:
  - "bioinformatics"
  - "modules"
  - "sequencing"
  license: "MIT"
  organization: "vsh"
  links:
    repository: "https://github.com/viash-hub/biobox"
    issue_tracker: "https://github.com/viash-hub/biobox/issues"