Files
craftbox/src/csv2fasta/config.vsh.yaml
CI 6a030faa77 Build branch v0.1 with version v0.1 (373be82)
Build pipeline: viash-hub.craftbox.v0.1-4f4mc

Source commit: 373be82129

Source message: Bump viash to 0.9.0-RC7 (#6)
2024-09-12 12:29:02 +00:00

102 lines
3.2 KiB
YAML

name: csv2fasta
description: |
Convert two columns from a CSV file to FASTA entries. The CSV file can
contain an optional header and each row (other than the header) becomes
a single FASTA record. One of the two columns will be used as the names
for the FASTA entries, while the other become the sequences. The sequences
column must only contain characters that are valid IUPAC notation for
nucleotides or a group thereof (wildcard characters).
argument_groups:
- name: Inputs
arguments:
- name: --input
type: file
direction: input
example: barcodes.csv
description: CSV file to be processed.
required: true
- name: --header
type: boolean_true
description: |
Parse the first line of the CSV file as a header.
- name: "CSV dialect options"
description: |
Options that can be used to override the automatically detected
dialect of the CSV file.
arguments:
- name: --delimiter
type: string
description: |
Overwrite the column delimiter character.
- name: --quote_character
type: string
description: |
Overwrite the character used to denote the start and end of a quoted item.
- name: "CSV column arguments"
description: |
Parameters for the selection of columns from the CSV file.
Only required when your CSV file contains more than 2 columns,
otherwise the first column will be used for the FASTA header
and the second for the FASTA nucleotide sequences. This default
can still be overwritten by using the options below.
arguments:
- name: --sequence_column
type: string
description: |
Name of the column containing the sequences. Implies 'header'.
Cannot be used together with 'sequence_column_index'.
required: false
- name: "--name_column"
type: string
description: |
Name of the column describing the FASTA headers. Implies 'header'.
Cannot be used together with 'name_column_index'.
required: false
- name: "--sequence_column_index"
type: integer
min: 0
description: |
Index of the column to use as the FASTA sequences, counter from the left and
starting from 0. Cannot be used in combination with the 'sequence_column' argument.
required: false
- name: "--name_column_index"
type: integer
min: 0
description: |
Index of the column to use as the FASTA headers, counter from the left and
starting from 0. Cannot be used in combination with 'name_column'.
required: false
- name: Outputs
arguments:
- name: "--output"
type: file
example: barcodes.fasta
direction: output
description: Output fasta file.
resources:
- type: python_script
path: script.py
test_resources:
- type: python_script
path: test_csv2fasta.py
engines:
- type: docker
image: python:slim
setup:
- type: apt
packages:
- procps
- type: python
packages:
- dnaio
test_setup:
- type: python
packages:
- pytest
- viashpy
runners:
- type: executable
- type: nextflow