name: "cutadapt" version: "main" authors: - name: "Toni Verbeiren" roles: - "author" - "maintainer" info: links: github: "tverbeiren" linkedin: "verbeiren" organizations: - name: "Data Intuitive" href: "https://www.data-intuitive.com" role: "Data Scientist and CEO" argument_groups: - name: "Specify Adapters for R1" arguments: - type: "string" name: "--adapter" alternatives: - "-a" description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\ \ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\ \ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\ \ the read.\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - type: "string" name: "--front" alternatives: - "-g" description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\ \ first read). The adapter and any preceding bases\nare trimmed. Partial matches\ \ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\ \ the adapter is\nonly found if it is a prefix of the read.\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - type: "string" name: "--anywhere" alternatives: - "-b" description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\ \ (paired data: of the first read). Both types of\nmatches as described under\ \ -a and -g are allowed. If the\nfirst base of the read is part of the match,\ \ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\ for rescuing failed library preparations - do not use if\nyou know which end\ \ your adapter was ligated to!\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - name: "Specify Adapters using Fasta files for R1" arguments: - type: "file" name: "--adapter_fasta" description: "Fasta file containing sequences of an adapter ligated to the 3'\ \ end (paired data:\nof the first read). The adapter and subsequent bases are\n\ trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\ \ found if it is a suffix of the read.\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: true multiple_sep: ";" - type: "file" name: "--front_fasta" description: "Fasta file containing sequences of an adapter ligated to the 5'\ \ end (paired data:\nof the first read). The adapter and any preceding bases\n\ are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\ \ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\ \ the read.\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--anywhere_fasta" description: "Fasta file containing sequences of an adapter that may be ligated\ \ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\ \ as described under -a and -g are allowed. If the\nfirst base of the read is\ \ part of the match, the behavior\nis as with -g, otherwise as with -a. This\ \ option is mostly\nfor rescuing failed library preparations - do not use if\n\ you know which end your adapter was ligated to!\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - name: "Specify Adapters for R2" arguments: - type: "string" name: "--adapter_r2" alternatives: - "-A" description: "Sequence of an adapter ligated to the 3' end (paired data:\nof the\ \ first read). The adapter and subsequent bases are\ntrimmed. If a '$' character\ \ is appended ('anchoring'), the\nadapter is only found if it is a suffix of\ \ the read.\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - type: "string" name: "--front_r2" alternatives: - "-G" description: "Sequence of an adapter ligated to the 5' end (paired data:\nof the\ \ first read). The adapter and any preceding bases\nare trimmed. Partial matches\ \ at the 5' end are allowed. If\na '^' character is prepended ('anchoring'),\ \ the adapter is\nonly found if it is a prefix of the read.\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - type: "string" name: "--anywhere_r2" alternatives: - "-B" description: "Sequence of an adapter that may be ligated to the 5' or 3'\nend\ \ (paired data: of the first read). Both types of\nmatches as described under\ \ -a and -g are allowed. If the\nfirst base of the read is part of the match,\ \ the behavior\nis as with -g, otherwise as with -a. This option is mostly\n\ for rescuing failed library preparations - do not use if\nyou know which end\ \ your adapter was ligated to!\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - name: "Specify Adapters using Fasta files for R2" arguments: - type: "file" name: "--adapter_r2_fasta" description: "Fasta file containing sequences of an adapter ligated to the 3'\ \ end (paired data:\nof the first read). The adapter and subsequent bases are\n\ trimmed. If a '$' character is appended ('anchoring'), the\nadapter is only\ \ found if it is a suffix of the read.\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--front_r2_fasta" description: "Fasta file containing sequences of an adapter ligated to the 5'\ \ end (paired data:\nof the first read). The adapter and any preceding bases\n\ are trimmed. Partial matches at the 5' end are allowed. If\na '^' character\ \ is prepended ('anchoring'), the adapter is\nonly found if it is a prefix of\ \ the read.\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--anywhere_r2_fasta" description: "Fasta file containing sequences of an adapter that may be ligated\ \ to the 5' or 3'\nend (paired data: of the first read). Both types of\nmatches\ \ as described under -a and -g are allowed. If the\nfirst base of the read is\ \ part of the match, the behavior\nis as with -g, otherwise as with -a. This\ \ option is mostly\nfor rescuing failed library preparations - do not use if\n\ you know which end your adapter was ligated to!\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - name: "Paired-end options" arguments: - type: "boolean_true" name: "--pair_adapters" description: "Treat adapters given with -a/-A etc. as pairs. Either both\nor none\ \ are removed from each read pair.\n" info: null direction: "input" - type: "string" name: "--pair_filter" description: "Which of the reads in a paired-end read have to match the\nfiltering\ \ criterion in order for the pair to be filtered.\n" info: null required: false choices: - "any" - "both" - "first" direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--interleaved" description: "Read and/or write interleaved paired-end reads.\n" info: null direction: "input" - name: "Input parameters" arguments: - type: "file" name: "--input" description: "Input fastq file for single-end reads or R1 for paired-end reads.\n" info: null must_exist: true create_parent: true required: true direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--input_r2" description: "Input fastq file for R2 in the case of paired-end reads.\n" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "double" name: "--error_rate" alternatives: - "-E" - "--errors" description: "Maximum allowed error rate (if 0 <= E < 1), or absolute\nnumber\ \ of errors for full-length adapter match (if E is an\ninteger >= 1). Error\ \ rate = no. of errors divided by\nlength of matching region. Default: 0.1 (10%).\n" info: null example: - 0.1 required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--no_indels" description: "Allow only mismatches in alignments.\n" info: null direction: "input" - type: "integer" name: "--times" alternatives: - "-n" description: "Remove up to COUNT adapters from each read. Default: 1.\n" info: null example: - 1 required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" name: "--overlap" alternatives: - "-O" description: "Require MINLENGTH overlap between read and adapter for an\nadapter\ \ to be found. The default is 3.\n" info: null example: - 3 required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--match_read_wildcards" description: "Interpret IUPAC wildcards in reads.\n" info: null direction: "input" - type: "boolean_true" name: "--no_match_adapter_wildcards" description: "Do not interpret IUPAC wildcards in adapters.\n" info: null direction: "input" - type: "string" name: "--action" description: "What to do if a match was found. trim: trim adapter and\nup- or\ \ downstream sequence; retain: trim, but retain\nadapter; mask: replace with\ \ 'N' characters; lowercase:\nconvert to lowercase; none: leave unchanged.\n\ The default is trim.\n" info: null example: - "trim" required: false choices: - "trim" - "retain" - "mask" - "lowercase" - "none" direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--revcomp" alternatives: - "--rc" description: "Check both the read and its reverse complement for adapter\nmatches.\ \ If match is on reverse-complemented version,\noutput that one.\n" info: null direction: "input" - name: "Demultiplexing options" arguments: - type: "string" name: "--demultiplex_mode" description: "Enable demultiplexing and set the mode for it.\nWith mode 'unique_dual',\ \ adapters from the first and second read are used,\nand the indexes from the\ \ reads are only used in pairs. This implies\n--pair_adapters.\nEnabling mode\ \ 'combinatorial_dual' allows all combinations of the sets of indexes\non R1\ \ and R2. It is necessary to write each read pair to an output\nfile depending\ \ on the adapters found on both R1 and R2.\nMode 'single', uses indexes or barcodes\ \ located at the 5'\nend of the R1 read (single). \n" info: null required: false choices: - "single" - "unique_dual" - "combinatorial_dual" direction: "input" multiple: false multiple_sep: ";" - name: "Read modifications" arguments: - type: "integer" name: "--cut" alternatives: - "-u" description: "Remove LEN bases from each read (or R1 if paired; use --cut_r2\n\ option for R2). If LEN is positive, remove bases from the\nbeginning. If LEN\ \ is negative, remove bases from the end.\nCan be used twice if LENs have different\ \ signs. Applied\n*before* adapter trimming.\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - type: "integer" name: "--cut_r2" description: "Remove LEN bases from each read (for R2). If LEN is positive, remove\ \ bases from the\nbeginning. If LEN is negative, remove bases from the end.\n\ Can be used twice if LENs have different signs. Applied\n*before* adapter trimming.\n" info: null required: false direction: "input" multiple: true multiple_sep: ";" - type: "string" name: "--nextseq_trim" description: "NextSeq-specific quality trimming (each read). Trims also\ndark\ \ cycles appearing as high-quality G bases.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--quality_cutoff" alternatives: - "-q" description: "Trim low-quality bases from 5' and/or 3' ends of each read\nbefore\ \ adapter removal. Applied to both reads if data is\npaired. If one value is\ \ given, only the 3' end is trimmed.\nIf two comma-separated cutoffs are given,\ \ the 5' end is\ntrimmed with the first cutoff, the 3' end with the second.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--quality_cutoff_r2" alternatives: - "-Q" description: "Quality-trimming cutoff for R2. Default: same as for R1\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" name: "--quality_base" description: "Assume that quality values in FASTQ are encoded as\nascii(quality\ \ + N). This needs to be set to 64 for some\nold Illumina FASTQ files. The default\ \ is 33.\n" info: null example: - 33 required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--poly_a" description: "Trim poly-A tails" info: null direction: "input" - type: "integer" name: "--length" alternatives: - "-l" description: "Shorten reads to LENGTH. Positive values remove bases at\nthe end\ \ while negative ones remove bases at the beginning.\nThis and the following\ \ modifications are applied after\nadapter trimming.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--trim_n" description: "Trim N's on ends of reads." info: null direction: "input" - type: "string" name: "--length_tag" description: "Search for TAG followed by a decimal number in the\ndescription\ \ field of the read. Replace the decimal number\nwith the correct length of\ \ the trimmed read. For example,\nuse --length-tag 'length=' to correct fields\ \ like\n'length=123'.\n" info: null example: - "length=" required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--strip_suffix" description: "Remove this suffix from read names if present. Can be\ngiven multiple\ \ times.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--prefix" alternatives: - "-x" description: "Add this prefix to read names. Use {name} to insert the\nname of\ \ the matching adapter.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--suffix" alternatives: - "-y" description: "Add this suffix to read names; can also include {name}\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--rename" description: "Rename reads using TEMPLATE containing variables such as\n{id},\ \ {adapter_name} etc. (see documentation)\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--zero_cap" alternatives: - "-z" description: "Change negative quality values to zero." info: null direction: "input" - name: "Filtering of processed reads" description: "Filters are applied after above read modifications. Paired-end reads\ \ are\nalways discarded pairwise (see also --pair_filter).\n" arguments: - type: "string" name: "--minimum_length" alternatives: - "-m" description: "Discard reads shorter than LEN. Default is 0.\nWhen trimming paired-end\ \ reads, the minimum lengths for R1 and R2 can be specified separately by separating\ \ them with a colon (:).\nIf the colon syntax is not used, the same minimum\ \ length applies to both reads, as discussed above.\nAlso, one of the values\ \ can be omitted to impose no restrictions.\nFor example, with -m 17:, the length\ \ of R1 must be at least 17, but the length of R2 is ignored.\n" info: null example: - "0" required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--maximum_length" alternatives: - "-M" description: "Discard reads longer than LEN. Default: no limit.\nFor paired reads,\ \ see the remark for --minimum_length\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--max_n" description: "Discard reads with more than COUNT 'N' bases. If COUNT is\na number\ \ between 0 and 1, it is interpreted as a fraction\nof the read length.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "long" name: "--max_expected_errors" alternatives: - "--max_ee" description: "Discard reads whose expected number of errors (computed\nfrom quality\ \ values) exceeds ERRORS.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "long" name: "--max_average_error_rate" alternatives: - "--max_aer" description: "as --max_expected_errors (see above), but divided by\nlength to\ \ account for reads of varying length.\n" info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--discard_trimmed" alternatives: - "--discard" description: "Discard reads that contain an adapter. Use also -O to\navoid discarding\ \ too many randomly matching reads.\n" info: null direction: "input" - type: "boolean_true" name: "--discard_untrimmed" alternatives: - "--trimmed_only" description: "Discard reads that do not contain an adapter.\n" info: null direction: "input" - type: "boolean_true" name: "--discard_casava" description: "Discard reads that did not pass CASAVA filtering (header\nhas :Y:).\n" info: null direction: "input" - name: "Output parameters" arguments: - type: "string" name: "--report" description: "Which type of report to print: 'full' (default) or 'minimal'.\n" info: null example: - "full" required: false choices: - "full" - "minimal" direction: "input" multiple: false multiple_sep: ";" - type: "boolean_true" name: "--json" description: "Write report in JSON format to this file.\n" info: null direction: "input" - type: "file" name: "--output" description: "Glob pattern for matching the expected output files.\nShould include\ \ `$output_dir`.\n" info: null example: - "fastq/*_001.fast[a,q]" must_exist: true create_parent: true required: true direction: "output" multiple: true multiple_sep: ";" - type: "boolean_true" name: "--fasta" description: "Output FASTA to standard output even on FASTQ input.\n" info: null direction: "input" - type: "boolean_true" name: "--info_file" description: "Write information about each read and its adapter matches\ninto\ \ info.txt in the output directory.\nSee the documentation for the file format.\n" info: null direction: "input" - name: "Debug" arguments: - type: "boolean_true" name: "--debug" description: "Print debug information" info: null direction: "input" resources: - type: "bash_script" path: "script.sh" is_executable: true description: "Cutadapt removes adapter sequences from high-throughput sequencing reads.\n" test_resources: - type: "bash_script" path: "test.sh" is_executable: true info: null status: "enabled" scope: image: "public" target: "public" requirements: commands: - "ps" keywords: - "RNA-seq" - "scRNA-seq" - "high-throughput" license: "MIT" references: doi: - "10.14806/ej.17.1.200" links: repository: "https://github.com/marcelm/cutadapt" homepage: "https://cutadapt.readthedocs.io" documentation: "https://cutadapt.readthedocs.io" runners: - type: "executable" id: "executable" docker_setup_strategy: "ifneedbepullelsecachedbuild" - type: "nextflow" id: "nextflow" directives: tag: "$id" auto: simplifyInput: true simplifyOutput: false transcript: false publish: false config: labels: mem1gb: "memory = 1000000000.B" mem2gb: "memory = 2000000000.B" mem5gb: "memory = 5000000000.B" mem10gb: "memory = 10000000000.B" mem20gb: "memory = 20000000000.B" mem50gb: "memory = 50000000000.B" mem100gb: "memory = 100000000000.B" mem200gb: "memory = 200000000000.B" mem500gb: "memory = 500000000000.B" mem1tb: "memory = 1000000000000.B" mem2tb: "memory = 2000000000000.B" mem5tb: "memory = 5000000000000.B" mem10tb: "memory = 10000000000000.B" mem20tb: "memory = 20000000000000.B" mem50tb: "memory = 50000000000000.B" mem100tb: "memory = 100000000000000.B" mem200tb: "memory = 200000000000000.B" mem500tb: "memory = 500000000000000.B" mem1gib: "memory = 1073741824.B" mem2gib: "memory = 2147483648.B" mem4gib: "memory = 4294967296.B" mem8gib: "memory = 8589934592.B" mem16gib: "memory = 17179869184.B" mem32gib: "memory = 34359738368.B" mem64gib: "memory = 68719476736.B" mem128gib: "memory = 137438953472.B" mem256gib: "memory = 274877906944.B" mem512gib: "memory = 549755813888.B" mem1tib: "memory = 1099511627776.B" mem2tib: "memory = 2199023255552.B" mem4tib: "memory = 4398046511104.B" mem8tib: "memory = 8796093022208.B" mem16tib: "memory = 17592186044416.B" mem32tib: "memory = 35184372088832.B" mem64tib: "memory = 70368744177664.B" mem128tib: "memory = 140737488355328.B" mem256tib: "memory = 281474976710656.B" mem512tib: "memory = 562949953421312.B" cpu1: "cpus = 1" cpu2: "cpus = 2" cpu5: "cpus = 5" cpu10: "cpus = 10" cpu20: "cpus = 20" cpu50: "cpus = 50" cpu100: "cpus = 100" cpu200: "cpus = 200" cpu500: "cpus = 500" cpu1000: "cpus = 1000" debug: false container: "docker" engines: - type: "docker" id: "docker" image: "python:3.12" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - type: "python" user: false pip: - "cutadapt" upgrade: true - type: "docker" run: - "cutadapt --version | sed 's/\\(.*\\)/cutadapt: \"\\1\"/' > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" id: "native" build_info: config: "src/cutadapt/config.vsh.yaml" runner: "executable" engine: "docker|native" output: "target/executable/cutadapt" executable: "target/executable/cutadapt/cutadapt" viash_version: "0.9.4" git_commit: "b0db228825f3441b4651527e8775e8fc87d06e60" git_remote: "https://github.com/viash-hub/biobox" git_tag: "v0.2.0-35-gb0db228" package_config: name: "biobox" version: "main" summary: "A curated collection of high-quality, standalone bioinformatics components\ \ built with [Viash](https://viash.io).\n" description: "`biobox` offers a suite of reliable bioinformatics components, similar\ \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\ \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\ \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\ \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\ \ Run components directly via the command line or seamlessly integrate them into\ \ Nextflow workflows.\n* **High Quality Standards:**\n * Comprehensive documentation\ \ for components and parameters.\n * Full exposure of underlying tool arguments.\n\ \ * Containerized (Docker) for dependency management and reproducibility.\n\ \ * Unit tested for verified functionality.\n" info: null viash_version: "0.9.4" source: "src" target: "target" config_mods: - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" keywords: - "bioinformatics" - "modules" - "sequencing" license: "MIT" organization: "vsh" links: repository: "https://github.com/viash-hub/biobox" issue_tracker: "https://github.com/viash-hub/biobox/issues"