craftbox/src/csv2fasta/config.vsh.yaml

name: csv2fasta
description: |
  Convert two columns from a CSV file to FASTA entries. The CSV file can
  contain an optional header and each row (other than the header) becomes
  a single FASTA record. One of the two columns will be used as the names
  for the FASTA entries, while the other become the sequences. The sequences
  column must only contain characters that are valid IUPAC notation for
  nucleotides or a group thereof (wildcard characters).
argument_groups:
  - name: Inputs
    arguments:
    - name: --input
      type: file
      direction: input
      example: barcodes.csv
      description: CSV file to be processed.
      required: true
    - name: --header
      type: boolean_true
      description: |
        Parse the first line of the CSV file as a header.
  - name: "CSV dialect options"
    description: |
      Options that can be used to override the automatically detected
      dialect of the CSV file.
    arguments:
      - name: --delimiter
        type: string
        description: |
          Overwrite the column delimiter character.
      - name: --quote_character
        type: string
        description: |
          Overwrite the character used to denote the start and end of a quoted item.
  - name: "CSV column arguments"
    description: |
      Parameters for the selection of columns from the CSV file.
      Only required when your CSV file contains more than 2 columns,
      otherwise the first column will be used for the FASTA header
      and the second for the FASTA nucleotide sequences. This default
      can still be overwritten by using the options below.
    arguments:
      - name: --sequence_column
        type: string
        description: |
          Name of the column containing the sequences. Implies 'header'.
          Cannot be used together with 'sequence_column_index'.
        required: false
      - name: "--name_column"
        type: string
        description: |
          Name of the column describing the FASTA headers. Implies 'header'.
          Cannot be used together with 'name_column_index'.
        required: false
      - name: "--sequence_column_index"
        type: integer
        min: 0
        description: |
          Index of the column to use as the FASTA sequences, counter from the left and
          starting from 0. Cannot be used in combination with the 'sequence_column' argument.
        required: false
      - name: "--name_column_index"
        type: integer
        min: 0
        description: |
          Index of the column to use as the FASTA headers, counter from the left and
          starting from 0. Cannot be used in combination with 'name_column'.
        required: false
  - name: Outputs
    arguments:
      - name: "--output"
        type: file
        example: barcodes.fasta
        direction: output
        description: Output fasta file.

resources:
  - type: python_script
    path: script.py
test_resources:
  - type: python_script
    path: test_csv2fasta.py

engines:
  - type: docker
    image: python:slim
    setup:
      - type: apt
        packages:
          - procps
      - type: python
        packages:
          - dnaio
    test_setup:
      - type: python
        packages:
          - pytest
          - viashpy

runners:
  - type: executable
  - type: nextflow