name: "generate_well_statistics"
namespace: "stats"
version: "main"
authors:
- name: "Dries Schaumont"
  roles:
  - "author"
  - "maintainer"
  info:
    links:
      email: "dries@data-intuitive.com"
      github: "DriesSchaumont"
      orcid: "0000-0002-4389-0440"
      linkedin: "dries-schaumont"
    organizations:
    - name: "Data Intuitive"
      href: "https://www.data-intuitive.com"
      role: "Data Scientist"
- name: "Marijke Van Moerbeke"
  roles:
  - "contributor"
  info:
    links:
      github: "mvanmoerbeke"
      orcid: "0000-0002-3097-5621"
      linkedin: "marijke-van-moerbeke-84303a34"
    organizations:
    - name: "OpenAnalytics"
      href: "https://www.openanalytics.eu"
      role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
  arguments:
  - type: "file"
    name: "--input"
    description: "The .bam file as returned by the mapping tool STAR."
    info: null
    example:
    - "input.bam"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--barcode"
    description: "The barcode for the well that is being processed. Is only used to\
      \ add a metadata\ncolumn to all output files.\n"
    info: null
    required: true
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--well_id"
    description: "ID of this well. Only used to add a metadata column to the output\
      \ files.\n"
    info: null
    required: true
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--processedBAMFile"
    description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
      \ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
      \ to which the read was mapped to.\n"
    info: null
    default:
    - "processedBamFile.txt"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--nrReadsNrGenesPerChrom"
    description: "Path to an output file that contains a .tsv formatted table describing\n\
      per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
      column) and the number of genes on that chromosome that had at least one\nread\
      \ mapped to it (NumberOfGenes).\n"
    info: null
    default:
    - "nrReadsNrGenesPerChrom.txt"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--nrReadsNrUMIsPerCB"
    description: "Path to an output file that contains a .tsv formatted table describing\n\
      per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
    info: null
    default:
    - "nrReadsNrUMIsPerCB.txt"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--umiFreqTop"
    description: "Path to an output file that contains a .tsv formatted table describing\n\
      per UMI (column UB) the frequency at which they occur in the reads (column\n\
      N). Only the top 100 UMIs are included.\n"
    info: null
    default:
    - "umiFreqTop100.txt"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--threads"
    description: "Number of threads to use for decompressing BAM files.\n"
    info: null
    default:
    - 1
    required: false
    min: 1
    direction: "input"
    multiple: false
    multiple_sep: ";"
resources:
- type: "python_script"
  path: "script.py"
  is_executable: true
- type: "file"
  path: "nextflow_labels.config"
  dest: "nextflow_labels.config"
- type: "file"
  path: "_viash.yaml"
  dest: "_viash.yaml"
description: "Generate summary statistics from BAM files generated by STAR solo."
test_resources:
- type: "python_script"
  path: "test.py"
  is_executable: true
- type: "file"
  path: "test.sam"
- type: "file"
  path: "empty.sam"
info: null
status: "enabled"
scope:
  image: "public"
  target: "public"
requirements:
  commands:
  - "ps"
license: "MIT"
links:
  repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
  id: "executable"
  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
  id: "nextflow"
  directives:
    tag: "$id"
  auto:
    simplifyInput: true
    simplifyOutput: false
    transcript: false
    publish: false
  config:
    labels:
      mem1gb: "memory = 1000000000.B"
      mem2gb: "memory = 2000000000.B"
      mem5gb: "memory = 5000000000.B"
      mem10gb: "memory = 10000000000.B"
      mem20gb: "memory = 20000000000.B"
      mem50gb: "memory = 50000000000.B"
      mem100gb: "memory = 100000000000.B"
      mem200gb: "memory = 200000000000.B"
      mem500gb: "memory = 500000000000.B"
      mem1tb: "memory = 1000000000000.B"
      mem2tb: "memory = 2000000000000.B"
      mem5tb: "memory = 5000000000000.B"
      mem10tb: "memory = 10000000000000.B"
      mem20tb: "memory = 20000000000000.B"
      mem50tb: "memory = 50000000000000.B"
      mem100tb: "memory = 100000000000000.B"
      mem200tb: "memory = 200000000000000.B"
      mem500tb: "memory = 500000000000000.B"
      mem1gib: "memory = 1073741824.B"
      mem2gib: "memory = 2147483648.B"
      mem4gib: "memory = 4294967296.B"
      mem8gib: "memory = 8589934592.B"
      mem16gib: "memory = 17179869184.B"
      mem32gib: "memory = 34359738368.B"
      mem64gib: "memory = 68719476736.B"
      mem128gib: "memory = 137438953472.B"
      mem256gib: "memory = 274877906944.B"
      mem512gib: "memory = 549755813888.B"
      mem1tib: "memory = 1099511627776.B"
      mem2tib: "memory = 2199023255552.B"
      mem4tib: "memory = 4398046511104.B"
      mem8tib: "memory = 8796093022208.B"
      mem16tib: "memory = 17592186044416.B"
      mem32tib: "memory = 35184372088832.B"
      mem64tib: "memory = 70368744177664.B"
      mem128tib: "memory = 140737488355328.B"
      mem256tib: "memory = 281474976710656.B"
      mem512tib: "memory = 562949953421312.B"
      cpu1: "cpus = 1"
      cpu2: "cpus = 2"
      cpu5: "cpus = 5"
      cpu10: "cpus = 10"
      cpu20: "cpus = 20"
      cpu50: "cpus = 50"
      cpu100: "cpus = 100"
      cpu200: "cpus = 200"
      cpu500: "cpus = 500"
      cpu1000: "cpus = 1000"
    script:
    - "includeConfig(\"nextflow_labels.config\")"
  debug: false
  container: "docker"
engines:
- type: "docker"
  id: "docker"
  image: "python:3.13-trixie"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
  setup:
  - type: "apt"
    packages:
    - "procps"
    interactive: false
  - type: "python"
    user: false
    packages:
    - "pysam"
    - "pandas"
    upgrade: true
  test_setup:
  - type: "python"
    user: false
    packages:
    - "viashpy"
    upgrade: true
  entrypoint: []
  cmd: null
- type: "native"
  id: "native"
build_info:
  config: "src/stats/generate_well_statistics/config.vsh.yaml"
  runner: "executable"
  engine: "docker|native"
  output: "target/executable/stats/generate_well_statistics"
  executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
  viash_version: "0.9.4"
  git_commit: "92755e84e7e2b17cc44664388d6fbb444a26cffd"
  git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
  name: "htrnaseq"
  version: "main"
  summary: "A workflow for high-throughput RNA-seq data analyses.\n"
  description: "This workflow is designed to process high-throughput RNA-seq data,\
    \ where every\nwell of a microarray plate is a sample. A fasta file provided as\
    \ input\ndefines the mapping between sample barcodes and wells.\n\nThe workflow\
    \ is built in a modular fashion, where most of the base functionality\nis provided\
    \ by components from [`biobox`](https://www.viash-hub.com/packages/biobox/latest)\n\
    supplemented by custom base components and workflow components in this package.\n\
    \nThe full workflow is split in two major subworkflows that can be run independently:\n\
    \n* **Well-demultiplexing:** Split the input (plate/pool level) fastq files per\
    \ well.\n* **Mapping, counting and QC:** Run per-well mapping, counting and generate\
    \ QC reports.\n\nEach of those can be started individually, or the full workflow\
    \ can be run in two ways:\n\n1. Run the [main workflow](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/htrnaseq)\
    \ \ncontaining the main functionality.\n2. Run the [(opinionated) `runner`](https://www.viash-hub.com/packages/htrnaseq/v0.3.0/components/workflows/runner)\
    \ where a\nnumber of choices (input/output structure and location) have been made.\n\
    \nInput for the workflow has to be `fastq` files (zipped or not). For bcl or other\
    \ formats, please consider running\n[demultiplex](https://www.viash-hub.com/packages/demultiplex)\
    \ first.\n"
  info:
    test_resources:
    - path: "gs://viash-hub-resources/htrnaseq/v2"
      dest: "resources_test"
  viash_version: "0.9.4"
  source: "src"
  target: "target"
  config_mods:
  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
    \ += 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
    \ dest: 'nextflow_labels.config'}\n.resources += {path: '/_viash.yaml', dest:\
    \ '_viash.yaml'}\n"
  - ".engines += { type: \"native\" }"
  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
  - ".engines[.type == 'docker'].target_tag := 'main'"
  keywords:
  - "bioinformatics"
  - "sequencing"
  - "high-throughput"
  - "RNAseq"
  - "mapping"
  - "counting"
  - "pipeline"
  - "workflow"
  license: "MIT"
  organization: "vsh"
  links:
    repository: "https://github.com/viash-hub/htrnaseq"
    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"