biobox/target/executable/busco/busco_run/.config.vsh.yaml

name: "busco_run"
namespace: "busco"
version: "main"
authors:
- name: "Dorien Roosen"
  roles:
  - "author"
  - "maintainer"
  info:
    links:
      email: "dorien@data-intuitive.com"
      github: "dorien-er"
      linkedin: "dorien-roosen"
    organizations:
    - name: "Data Intuitive"
      href: "https://www.data-intuitive.com"
      role: "Data Scientist"
argument_groups:
- name: "Inputs"
  arguments:
  - type: "file"
    name: "--input"
    alternatives:
    - "-i"
    description: "Input sequence file in FASTA format. Can be an assembled genome\
      \ or transcriptome (DNA), or protein sequences from an annotated gene set. Also\
      \ possible to use a path to a directory containing multiple input files.\n"
    info: null
    example:
    - "file.fasta"
    must_exist: true
    create_parent: true
    required: true
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--mode"
    alternatives:
    - "-m"
    description: "Specify which BUSCO analysis mode to run. There are three valid\
      \ modes:\n  - geno or genome, for genome assemblies (DNA)\n  - tran or transcriptome,\
      \ for transcriptome assemblies (DNA)\n  - prot or proteins, for annotated gene\
      \ sets (protein)\n"
    info: null
    example:
    - "proteins"
    required: true
    choices:
    - "genome"
    - "geno"
    - "transcriptome"
    - "tran"
    - "proteins"
    - "prot"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--lineage_dataset"
    alternatives:
    - "-l"
    description: "Specify a BUSCO lineage dataset that is most closely related to\
      \ the assembly or gene set being assessed. \nThe full list of available datasets\
      \ can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by\
      \ running the busco/busco_list_datasets component.\nWhen unsure, the \"--auto_lineage\"\
      \ flag can be set to automatically find the optimal lineage path.\nBUSCO will\
      \ automatically download the requested dataset if it is not already present\
      \ in the download folder. \nYou can optionally provide a path to a local dataset\
      \ instead of a name, e.g. path/to/dataset.\nDatasets can be downloaded using\
      \ the busco/busco_download_dataset component.\n"
    info: null
    example:
    - "stramenopiles_odb10"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Outputs"
  arguments:
  - type: "file"
    name: "--short_summary_json"
    description: "Output file for short summary in JSON format.\n"
    info: null
    example:
    - "short_summary.json"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--short_summary_txt"
    description: "Output file for short summary in TXT format.\n"
    info: null
    example:
    - "short_summary.txt"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--full_table"
    description: "Full table output in TSV format.\n"
    info: null
    example:
    - "full_table.tsv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--missing_busco_list"
    description: "Missing list output in TSV format.\n"
    info: null
    example:
    - "missing_busco_list.tsv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--output_dir"
    description: "The full output directory, if so desired.\n"
    info: null
    example:
    - "output_dir"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
- name: "Resource and Run Settings"
  arguments:
  - type: "boolean_true"
    name: "--force"
    description: "Force rewriting of existing files. Must be used when output files\
      \ with the provided name already exist.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--quiet"
    alternatives:
    - "-q"
    description: "Disable the info logs, displays only errors.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--restart"
    alternatives:
    - "-r"
    description: "Continue a run that had already partially completed. Restarting\
      \ skips calls to tools that have completed but performs all pre- and post-processing\
      \ steps.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--tar"
    description: "Compress some subdirectories with many files to save space.\n"
    info: null
    direction: "input"
- name: "Lineage Dataset Settings"
  arguments:
  - type: "boolean_true"
    name: "--auto_lineage"
    description: "Run auto-lineage pipelilne to automatically determine BUSCO lineage\
      \ dataset that is most closely related to the assembly or gene set being assessed.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--auto_lineage_euk"
    description: "Run auto-placement just on eukaryota tree to find optimal lineage\
      \ path.\n"
    info: null
    direction: "input"
  - type: "boolean_true"
    name: "--auto_lineage_prok"
    description: "Run auto_lineage just on prokaryota trees to find optimum lineage\
      \ path.\n"
    info: null
    direction: "input"
  - type: "string"
    name: "--datasets_version"
    description: "Specify the version of BUSCO datasets\n"
    info: null
    example:
    - "odb10"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Augustus Settings"
  arguments:
  - type: "boolean_true"
    name: "--augustus"
    description: "Use augustus gene predictor for eukaryote runs.\n"
    info: null
    direction: "input"
  - type: "string"
    name: "--augustus_parameters"
    description: "Additional parameters to be passed to Augustus (see Augustus documentation:\
      \ https://github.com/Gaius-Augustus/Augustus/blob/master/docs/RUNNING-AUGUSTUS.md).\n\
      Parameters should be contained within a single string, without whitespace and\
      \ seperated by commas.\n"
    info: null
    example:
    - "--PARAM1=VALUE1,--PARAM2=VALUE2"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--augustus_species"
    description: "Specify the augustus species\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--long"
    description: "Optimize Augustus self-training mode. This adds considerably to\
      \ the run time, but can improve results for some non-model organisms.\n"
    info: null
    direction: "input"
- name: "BBTools Settings"
  arguments:
  - type: "integer"
    name: "--contig_break"
    description: "Number of contiguous Ns to signify a break between contigs in BBTools\
      \ analysis.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--limit"
    description: "Number of candidate regions (contig or transcript) from the BLAST\
      \ output to consider per BUSCO.\nThis option is only effective in pipelines\
      \ using BLAST, i.e. the genome pipeline (see --augustus) or the prokaryota transcriptome\
      \ pipeline.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--scaffold_composition"
    description: "Writes ACGTN content per scaffold to a file scaffold_composition.txt.\n"
    info: null
    direction: "input"
- name: "BLAST Settings"
  arguments:
  - type: "double"
    name: "--e_value"
    description: "E-value cutoff for BLAST searches.\n"
    info: null
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Protein Gene Prediction settings"
  arguments:
  - type: "boolean_true"
    name: "--miniprot"
    description: "Use Miniprot gene predictor.\n"
    info: null
    direction: "input"
- name: "MetaEuk Settings"
  arguments:
  - type: "boolean_true"
    name: "--metaeuk"
    description: "Use Metaeuk gene predictor.\n"
    info: null
    direction: "input"
  - type: "string"
    name: "--metaeuk_parameters"
    description: "Pass additional arguments to Metaeuk for the first run (see Metaeuk\
      \ documentation https://github.com/soedinglab/metaeuk).\nAll parameters should\
      \ be contained within a single string with no white space, with each parameter\
      \ separated by a comma.\n"
    info: null
    example:
    - "--max-overlap=15,--min-exon-aa=15"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--metaeuk_rerun_parameters"
    description: "Pass additional arguments to Metaeuk for the second run (see Metaeuk\
      \ documentation https://github.com/soedinglab/metaeuk).\nAll parameters should\
      \ be contained within a single string with no white space, with each parameter\
      \ separated by a comma.\n"
    info: null
    example:
    - "--max-overlap=15,--min-exon-aa=15"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
resources:
- type: "bash_script"
  path: "script.sh"
  is_executable: true
description: "Assessment of genome assembly and annotation completeness with single\
  \ copy orthologs"
test_resources:
- type: "bash_script"
  path: "test.sh"
  is_executable: true
- type: "file"
  path: "test_data"
info: null
status: "enabled"
requirements:
  commands:
  - "ps"
keywords:
- "Genome assembly"
- "quality control"
license: "MIT"
references:
  doi:
  - "10.1007/978-1-4939-9173-0_14"
links:
  repository: "https://gitlab.com/ezlab/busco"
  homepage: "https://busco.ezlab.org/"
  documentation: "https://busco.ezlab.org/busco_userguide.html"
runners:
- type: "executable"
  id: "executable"
  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
  id: "nextflow"
  directives:
    tag: "$id"
  auto:
    simplifyInput: true
    simplifyOutput: false
    transcript: false
    publish: false
  config:
    labels:
      mem1gb: "memory = 1000000000.B"
      mem2gb: "memory = 2000000000.B"
      mem5gb: "memory = 5000000000.B"
      mem10gb: "memory = 10000000000.B"
      mem20gb: "memory = 20000000000.B"
      mem50gb: "memory = 50000000000.B"
      mem100gb: "memory = 100000000000.B"
      mem200gb: "memory = 200000000000.B"
      mem500gb: "memory = 500000000000.B"
      mem1tb: "memory = 1000000000000.B"
      mem2tb: "memory = 2000000000000.B"
      mem5tb: "memory = 5000000000000.B"
      mem10tb: "memory = 10000000000000.B"
      mem20tb: "memory = 20000000000000.B"
      mem50tb: "memory = 50000000000000.B"
      mem100tb: "memory = 100000000000000.B"
      mem200tb: "memory = 200000000000000.B"
      mem500tb: "memory = 500000000000000.B"
      mem1gib: "memory = 1073741824.B"
      mem2gib: "memory = 2147483648.B"
      mem4gib: "memory = 4294967296.B"
      mem8gib: "memory = 8589934592.B"
      mem16gib: "memory = 17179869184.B"
      mem32gib: "memory = 34359738368.B"
      mem64gib: "memory = 68719476736.B"
      mem128gib: "memory = 137438953472.B"
      mem256gib: "memory = 274877906944.B"
      mem512gib: "memory = 549755813888.B"
      mem1tib: "memory = 1099511627776.B"
      mem2tib: "memory = 2199023255552.B"
      mem4tib: "memory = 4398046511104.B"
      mem8tib: "memory = 8796093022208.B"
      mem16tib: "memory = 17592186044416.B"
      mem32tib: "memory = 35184372088832.B"
      mem64tib: "memory = 70368744177664.B"
      mem128tib: "memory = 140737488355328.B"
      mem256tib: "memory = 281474976710656.B"
      mem512tib: "memory = 562949953421312.B"
      cpu1: "cpus = 1"
      cpu2: "cpus = 2"
      cpu5: "cpus = 5"
      cpu10: "cpus = 10"
      cpu20: "cpus = 20"
      cpu50: "cpus = 50"
      cpu100: "cpus = 100"
      cpu200: "cpus = 200"
      cpu500: "cpus = 500"
      cpu1000: "cpus = 1000"
  debug: false
  container: "docker"
engines:
- type: "docker"
  id: "docker"
  image: "quay.io/biocontainers/busco:5.7.1--pyhdfd78af_0"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
  setup:
  - type: "docker"
    run:
    - "busco --version | sed 's/BUSCO\\s\\(.*\\)/busco: \"\\1\"/' > /var/software_versions.txt\n"
  entrypoint: []
  cmd: null
- type: "native"
  id: "native"
build_info:
  config: "src/busco/busco_run/config.vsh.yaml"
  runner: "executable"
  engine: "docker|native"
  output: "target/executable/busco/busco_run"
  executable: "target/executable/busco/busco_run/busco_run"
  viash_version: "0.9.0-RC6"
  git_commit: "766ab6c9c3059004c7c3f205621909b2d8b0b26d"
  git_remote: "https://github.com/viash-hub/biobox"
package_config:
  name: "biobox"
  version: "main"
  description: "A collection of bioinformatics tools for working with sequence data.\n"
  info: null
  viash_version: "0.9.0-RC6"
  source: "src"
  target: "target"
  config_mods:
  - ".requirements.commands := ['ps']\n"
  - ".engines += { type: \"native\" }"
  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
  - ".engines[.type == 'docker'].target_tag := 'main'"
  keywords:
  - "bioinformatics"
  - "modules"
  - "sequencing"
  license: "MIT"
  organization: "vsh"
  links:
    repository: "https://github.com/viash-hub/biobox"
    issue_tracker: "https://github.com/viash-hub/biobox/issues"