biobox/target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/.config.vsh.yaml

name: "bd_rhapsody_sequence_analysis"
namespace: "bd_rhapsody"
version: "main"
authors:
- name: "Robrecht Cannoodt"
  roles:
  - "author"
  - "maintainer"
  info:
    links:
      email: "robrecht@data-intuitive.com"
      github: "rcannood"
      orcid: "0000-0003-3641-729X"
      linkedin: "robrechtcannoodt"
    organizations:
    - name: "Data Intuitive"
      href: "https://www.data-intuitive.com"
      role: "Data Science Engineer"
    - name: "Open Problems"
      href: "https://openproblems.bio"
      role: "Core Member"
- name: "Weiwei Schultz"
  roles:
  - "contributor"
  info:
    organizations:
    - name: "Janssen R&D US"
      role: "Associate Director Data Sciences"
argument_groups:
- name: "Inputs"
  arguments:
  - type: "file"
    name: "--reads"
    description: "Reads (optional) - Path to your FASTQ.GZ formatted read files from\
      \ libraries that may include:\n\n- WTA mRNA\n- Targeted mRNA\n- AbSeq\n- Sample\
      \ Multiplexing\n- VDJ\n\nYou may specify as many R1/R2 read pairs as you want.\n"
    info:
      config_key: "Reads"
    example:
    - "WTALibrary_S1_L001_R1_001.fastq.gz"
    - "WTALibrary_S1_L001_R2_001.fastq.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--reads_atac"
    description: "Path to your FASTQ.GZ formatted read files from ATAC-Seq libraries.\n\
      You may specify as many R1/R2/I2 files as you want.\n"
    info:
      config_key: "Reads_ATAC"
    example:
    - "ATACLibrary_S2_L001_R1_001.fastq.gz"
    - "ATACLibrary_S2_L001_R2_001.fastq.gz"
    - "ATACLibrary_S2_L001_I2_001.fastq.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "References"
  description: "Assay type will be inferred from the provided reference(s).\nDo not\
    \ provide both reference_archive and targeted_reference at the same time.\n\n\
    Valid reference input combinations:\n  - reference_archive: WTA only\n  - reference_archive\
    \ & abseq_reference: WTA + AbSeq\n  - reference_archive & supplemental_reference:\
    \ WTA + extra transgenes\n  - reference_archive & abseq_reference & supplemental_reference:\
    \ WTA + AbSeq + extra transgenes\n  - reference_archive: WTA + ATAC or ATAC only\n\
    \  - reference_archive & supplemental_reference: WTA + ATAC + extra transgenes\n\
    \  - targeted_reference: Targeted only\n  - targeted_reference & abseq_reference:\
    \ Targeted + AbSeq\n  - abseq_reference: AbSeq only\n\nThe reference_archive can\
    \ be generated with the bd_rhapsody_make_reference component.\nAlternatively,\
    \ BD also provides standard references which can be downloaded from these locations:\n\
    \n  - Human: https://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-WTA/Pipeline-version2.x_WTA_references/RhapRef_Human_WTA_2023-02.tar.gz\n\
    \  - Mouse: https://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-WTA/Pipeline-version2.x_WTA_references/RhapRef_Mouse_WTA_2023-02.tar.gz\n"
  arguments:
  - type: "file"
    name: "--reference_archive"
    description: "Path to Rhapsody WTA Reference in the tar.gz format.\n\nStructure\
      \ of the reference archive:\n\n- `BD_Rhapsody_Reference_Files/`: top level folder\n\
      \  - `star_index/`: sub-folder containing STAR index, that is files created\
      \ with `STAR --runMode genomeGenerate`\n  - GTF for gene-transcript-annotation\
      \ e.g. \"gencode.v43.primary_assembly.annotation.gtf\"\n"
    info:
      config_key: "Reference_Archive"
    example:
    - "RhapRef_Human_WTA_2023-02.tar.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--targeted_reference"
    description: "Path to the targeted reference file in FASTA format.\n"
    info:
      config_key: "Targeted_Reference"
    example:
    - "BD_Rhapsody_Immune_Response_Panel_Hs.fasta"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--abseq_reference"
    description: "Path to the AbSeq reference file in FASTA format.  Only needed if\
      \ BD AbSeq Ab-Oligos are used."
    info:
      config_key: "AbSeq_Reference"
    example:
    - "AbSeq_reference.fasta"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--supplemental_reference"
    alternatives:
    - "-s"
    description: "Path to the supplemental reference file in FASTA format.  Only needed\
      \ if there are additional transgene sequences to be aligned against in a WTA\
      \ assay experiment."
    info:
      config_key: "Supplemental_Reference"
    example:
    - "supplemental_reference.fasta"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Outputs"
  description: "Outputs for all pipeline runs"
  arguments:
  - type: "file"
    name: "--output_dir"
    alternatives:
    - "-o"
    description: "The unprocessed output directory containing all the outputs from\
      \ the pipeline."
    info: null
    example:
    - "output_dir"
    must_exist: true
    create_parent: true
    required: true
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--output_seurat"
    description: "Single-cell analysis tool inputs. Seurat (.rds) input file containing\
      \ RSEC molecules data table and all cell annotation metadata."
    info:
      template: "[sample_name]_Seurat.rds"
    example:
    - "output_seurat.rds"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--output_mudata"
    description: "Single-cell analysis tool inputs. Scanpy / Muon input file containing\
      \ RSEC molecules data table and all cell annotation metadata."
    info:
      template: "[sample_name].h5mu"
    example:
    - "output_mudata.h5mu"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--metrics_summary"
    description: "Metrics Summary. Report containing sequencing, molecules, and cell\
      \ metrics."
    info:
      template: "[sample_name]_Metrics_Summary.csv"
    example:
    - "metrics_summary.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--pipeline_report"
    description: "Pipeline Report. Summary report containing the results from the\
      \ sequencing analysis pipeline run."
    info:
      template: "[sample_name]_Pipeline_Report.html"
    example:
    - "pipeline_report.html"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--rsec_mols_per_cell"
    description: "Molecules per bioproduct per cell bassed on RSEC"
    info:
      template: "[sample_name]_RSEC_MolsPerCell_MEX.zip"
    example:
    - "RSEC_MolsPerCell_MEX.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--dbec_mols_per_cell"
    description: "Molecules per bioproduct per cell bassed on DBEC. DBEC data table\
      \ is only output if the experiment includes targeted mRNA or AbSeq bioproducts."
    info:
      template: "[sample_name]_DBEC_MolsPerCell_MEX.zip"
    example:
    - "DBEC_MolsPerCell_MEX.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--rsec_mols_per_cell_unfiltered"
    description: "Unfiltered tables containing all cell labels with ≥10 reads."
    info:
      template: "[sample_name]_RSEC_MolsPerCell_Unfiltered_MEX.zip"
    example:
    - "RSEC_MolsPerCell_Unfiltered_MEX.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--bam"
    description: "Alignment file of R2 with associated R1 annotations for Bioproduct."
    info:
      template: "[sample_name]_Bioproduct.bam"
    example:
    - "BioProduct.bam"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--bam_index"
    description: "Index file for the alignment file."
    info:
      template: "[sample_name]_Bioproduct.bam.bai"
    example:
    - "BioProduct.bam.bai"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--bioproduct_stats"
    description: "Bioproduct Stats. Metrics from RSEC and DBEC Unique Molecular Identifier\
      \ adjustment algorithms on a per-bioproduct basis."
    info:
      template: "[sample_name]_Bioproduct_Stats.csv"
    example:
    - "Bioproduct_Stats.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--dimred_tsne"
    description: "t-SNE dimensionality reduction coordinates per cell index"
    info:
      template: "[sample_name]_(assay)_tSNE_coordinates.csv"
    example:
    - "tSNE_coordinates.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--dimred_umap"
    description: "UMAP dimensionality reduction coordinates per cell index"
    info:
      template: "[sample_name]_(assay)_UMAP_coordinates.csv"
    example:
    - "UMAP_coordinates.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--immune_cell_classification"
    description: "Immune Cell Classification. Cell type classification based on the\
      \ expression of immune cell markers."
    info:
      template: "[sample_name]_(assay)_cell_type_experimental.csv"
    example:
    - "Immune_Cell_Classification.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
- name: "Multiplex outputs"
  description: "Outputs when multiplex option is selected"
  arguments:
  - type: "file"
    name: "--sample_tag_metrics"
    description: "Sample Tag Metrics. Metrics from the sample determination algorithm."
    info:
      template: "[sample_name]_Sample_Tag_Metrics.csv"
    example:
    - "Sample_Tag_Metrics.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--sample_tag_calls"
    description: "Sample Tag Calls. Assigned Sample Tag for each putative cell"
    info:
      template: "[sample_name]_Sample_Tag_Calls.csv"
    example:
    - "Sample_Tag_Calls.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--sample_tag_counts"
    description: "Sample Tag Counts. Separate data tables and metric summary for cells\
      \ assigned to each sample tag. Note: For putative cells that could not be assigned\
      \ a specific Sample Tag, a Multiplet_and_Undetermined.zip file is also output."
    info:
      template: "[sample_name]_Sample_Tag[number].zip"
    example:
    - "Sample_Tag1.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--sample_tag_counts_unassigned"
    description: "Sample Tag Counts Unassigned. Data table and metric summary for\
      \ cells that could not be assigned a specific Sample Tag."
    info:
      template: "[sample_name]_Multiplet_and_Undetermined.zip"
    example:
    - "Multiplet_and_Undetermined.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
- name: "VDJ Outputs"
  description: "Outputs when VDJ option selected"
  arguments:
  - type: "file"
    name: "--vdj_metrics"
    description: "VDJ Metrics. Overall metrics from the VDJ analysis."
    info:
      template: "[sample_name]_VDJ_Metrics.csv"
    example:
    - "VDJ_Metrics.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--vdj_per_cell"
    description: "VDJ Per Cell. Cell specific read and molecule counts, VDJ gene segments,\
      \ CDR3 sequences, paired chains, and cell type."
    info:
      template: "[sample_name]_VDJ_perCell.csv"
    example:
    - "VDJ_perCell.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--vdj_per_cell_uncorrected"
    description: "VDJ Per Cell Uncorrected. Cell specific read and molecule counts,\
      \ VDJ gene segments, CDR3 sequences, paired chains, and cell type."
    info:
      template: "[sample_name]_VDJ_perCell_uncorrected.csv"
    example:
    - "VDJ_perCell_uncorrected.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--vdj_dominant_contigs"
    description: "VDJ Dominant Contigs. Dominant contig for each cell label chain\
      \ type combination (putative cells only)."
    info:
      template: "[sample_name]_VDJ_Dominant_Contigs_AIRR.csv"
    example:
    - "VDJ_Dominant_Contigs_AIRR.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--vdj_unfiltered_contigs"
    description: "VDJ Unfiltered Contigs. All contigs that were assembled and annotated\
      \ successfully (all cells)."
    info:
      template: "[sample_name]_VDJ_Unfiltered_Contigs_AIRR.csv"
    example:
    - "VDJ_Unfiltered_Contigs_AIRR.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
- name: "ATAC-Seq outputs"
  description: "Outputs when ATAC-Seq option selected"
  arguments:
  - type: "file"
    name: "--atac_metrics"
    description: "ATAC Metrics. Overall metrics from the ATAC-Seq analysis."
    info:
      template: "[sample_name]_ATAC_Metrics.csv"
    example:
    - "ATAC_Metrics.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_metrics_json"
    description: "ATAC Metrics JSON. Overall metrics from the ATAC-Seq analysis in\
      \ JSON format."
    info:
      template: "[sample_name]_ATAC_Metrics.json"
    example:
    - "ATAC_Metrics.json"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_fragments"
    description: "ATAC Fragments. Chromosomal location, cell index, and read support\
      \ for each fragment detected"
    info:
      template: "[sample_name]_ATAC_Fragments.bed.gz"
    example:
    - "ATAC_Fragments.bed.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_fragments_index"
    description: "Index of ATAC Fragments."
    info:
      template: "[sample_name]_ATAC_Fragments.bed.gz.tbi"
    example:
    - "ATAC_Fragments.bed.gz.tbi"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_transposase_sites"
    description: "ATAC Transposase Sites. Chromosomal location, cell index, and read\
      \ support for each transposase site detected"
    info:
      template: "[sample_name]_ATAC_Transposase_Sites.bed.gz"
    example:
    - "ATAC_Transposase_Sites.bed.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_transposase_sites_index"
    description: "Index of ATAC Transposase Sites."
    info:
      template: "[sample_name]_ATAC_Transposase_Sites.bed.gz.tbi"
    example:
    - "ATAC_Transposase_Sites.bed.gz.tbi"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_peaks"
    description: "ATAC Peaks. Peak regions of transposase activity"
    info:
      template: "[sample_name]_ATAC_Peaks.bed.gz"
    example:
    - "ATAC_Peaks.bed.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_peaks_index"
    description: "Index of ATAC Peaks."
    info:
      template: "[sample_name]_ATAC_Peaks.bed.gz.tbi"
    example:
    - "ATAC_Peaks.bed.gz.tbi"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_peak_annotation"
    description: "ATAC Peak Annotation. Estimated annotation of peak-to-gene connections"
    info:
      template: "[sample_name]_peak_annotation.tsv.gz"
    example:
    - "peak_annotation.tsv.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_cell_by_peak"
    description: "ATAC Cell by Peak. Peak regions of transposase activity per cell"
    info:
      template: "[sample_name]_ATAC_Cell_by_Peak_MEX.zip"
    example:
    - "ATAC_Cell_by_Peak_MEX.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_cell_by_peak_unfiltered"
    description: "ATAC Cell by Peak Unfiltered. Unfiltered file containing all cell\
      \ labels with >=1 transposase sites in peaks."
    info:
      template: "[sample_name]_ATAC_Cell_by_Peak_Unfiltered_MEX.zip"
    example:
    - "ATAC_Cell_by_Peak_Unfiltered_MEX.zip"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_bam"
    description: "ATAC BAM. Alignment file for R1 and R2 with associated I2 annotations\
      \ for ATAC-Seq. Only output if the BAM generation flag is set to true."
    info:
      template: "[sample_name]_ATAC.bam"
    example:
    - "ATAC.bam"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--atac_bam_index"
    description: "Index of ATAC BAM."
    info:
      template: "[sample_name]_ATAC.bam.bai"
    example:
    - "ATAC.bam.bai"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
- name: "AbSeq Cell Calling outputs"
  description: "Outputs when Cell Calling Abseq is selected"
  arguments:
  - type: "file"
    name: "--protein_aggregates_experimental"
    description: "Protein Aggregates Experimental"
    info:
      template: "[sample_name]_Protein_Aggregates_Experimental.csv"
    example:
    - "Protein_Aggregates_Experimental.csv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
- name: "Putative Cell Calling Settings"
  arguments:
  - type: "string"
    name: "--cell_calling_data"
    description: "Specify the dataset to be used for putative cell calling: mRNA,\
      \ AbSeq, ATAC, mRNA_and_ATAC\n\nFor putative cell calling using an AbSeq dataset,\
      \ please provide an AbSeq_Reference fasta file above.\n\nFor putative cell calling\
      \ using an ATAC dataset, please provide a WTA+ATAC-Seq Reference_Archive file\
      \ above.\n\nThe default data for putative cell calling, will be determined the\
      \ following way:\n\n- If mRNA Reads and ATAC Reads exist: mRNA_and_ATAC\n- If\
      \ only ATAC Reads exist: ATAC\n- Otherwise: mRNA\n"
    info:
      config_key: "Cell_Calling_Data"
    example:
    - "mRNA"
    required: false
    choices:
    - "mRNA"
    - "AbSeq"
    - "ATAC"
    - "mRNA_and_ATAC"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--cell_calling_bioproduct_algorithm"
    description: "Specify the bioproduct algorithm to be used for putative cell calling:\
      \ Basic or Refined\n\nBy default, the Basic algorithm will be used for putative\
      \ cell calling.\n"
    info:
      config_key: "Cell_Calling_Bioproduct_Algorithm"
    example:
    - "Basic"
    required: false
    choices:
    - "Basic"
    - "Refined"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--cell_calling_atac_algorithm"
    description: "Specify the ATAC-seq algorithm to be used for putative cell calling:\
      \ Basic or Refined\n\nBy default, the Basic algorithm will be used for putative\
      \ cell calling.\n"
    info:
      config_key: "Cell_Calling_ATAC_Algorithm"
    example:
    - "Basic"
    required: false
    choices:
    - "Basic"
    - "Refined"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--exact_cell_count"
    description: "Set a specific number (>=1) of cells as putative, based on those\
      \ with the highest error-corrected read count\n"
    info:
      config_key: "Exact_Cell_Count"
    example:
    - 10000
    required: false
    min: 1
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--expected_cell_count"
    description: "Guide the basic putative cell calling algorithm by providing an\
      \ estimate of the number of cells expected.  Usually this can be the number\
      \ of cells loaded into the Rhapsody cartridge.  If there are multiple inflection\
      \ points on the second derivative cumulative curve, this will ensure the one\
      \ selected is near the expected. \n"
    info:
      config_key: "Expected_Cell_Count"
    example:
    - 20000
    required: false
    min: 1
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Intronic Reads Settings"
  arguments:
  - type: "boolean"
    name: "--exclude_intronic_reads"
    description: "By default, the flag is false, and reads aligned to exons and introns\
      \ are considered and represented in molecule counts. When the flag is set to\
      \ true, intronic reads will be excluded.\nThe value can be true or false.\n"
    info:
      config_key: "Exclude_Intronic_Reads"
    example:
    - false
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Multiplex Settings"
  arguments:
  - type: "string"
    name: "--sample_tags_version"
    description: "Specify the version of the Sample Tags used in the run:\n\n* If\
      \ Sample Tag Multiplexing was done, specify the appropriate version: human,\
      \ mouse, flex, nuclei_includes_mrna, nuclei_atac_only\n* If this is an SMK +\
      \ Nuclei mRNA run or an SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq) run (and not\
      \ an SMK + ATAC-Seq only run), choose the \"nuclei_includes_mrna\" option.\n\
      * If this is an SMK + ATAC-Seq only run (and not SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq)),\
      \ choose the \"nuclei_atac_only\" option.\n"
    info:
      config_key: "Sample_Tags_Version"
    example:
    - "human"
    required: false
    choices:
    - "human"
    - "mouse"
    - "flex"
    - "nuclei_includes_mrna"
    - "nuclei_atac_only"
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--tag_names"
    description: "Specify the tag number followed by '-' and the desired sample name\
      \ to appear in Sample_Tag_Metrics.csv\nDo not use the special characters: &,\
      \ (), [], {},  <>, ?, |\n"
    info:
      config_key: "Tag_Names"
    example:
    - "4-mySample"
    - "9-myOtherSample"
    - "6-alsoThisSample"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "VDJ arguments"
  arguments:
  - type: "string"
    name: "--vdj_version"
    description: "If VDJ was done, specify the appropriate option: human, mouse, humanBCR,\
      \ humanTCR, mouseBCR, mouseTCR\n"
    info:
      config_key: "VDJ_Version"
    example:
    - "human"
    required: false
    choices:
    - "human"
    - "mouse"
    - "humanBCR"
    - "humanTCR"
    - "mouseBCR"
    - "mouseTCR"
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "ATAC options"
  arguments:
  - type: "file"
    name: "--predefined_atac_peaks"
    description: "An optional BED file containing pre-established chromatin accessibility\
      \ peak regions for generating the ATAC cell-by-peak matrix."
    info:
      config_key: "Predefined_ATAC_Peaks"
    example:
    - "predefined_peaks.bed"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Additional options"
  arguments:
  - type: "string"
    name: "--run_name"
    description: "Specify a run name to use as the output file base name. Use only\
      \ letters, numbers, or hyphens. Do not use special characters or spaces.\n"
    info:
      config_key: "Run_Name"
    default:
    - "sample"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean"
    name: "--generate_bam"
    description: "Specify whether to create the BAM file output\n"
    info:
      config_key: "Generate_Bam"
    default:
    - false
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean"
    name: "--long_reads"
    description: "Use STARlong (default: undefined - i.e. autodetects based on read\
      \ lengths) - Specify if the STARlong aligner should be used instead of STAR.\
      \ Set to true if the reads are longer than 650bp.\n"
    info:
      config_key: "Long_Reads"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Advanced options"
  description: "NOTE: Only change these if you are really sure about what you are\
    \ doing\n"
  arguments:
  - type: "string"
    name: "--custom_star_params"
    description: "Modify STAR alignment parameters - Set this parameter to fully override\
      \ default STAR mapping parameters used in the pipeline.\nFor reference this\
      \ is the default that is used:\n\n  Short Reads: `--outFilterScoreMinOverLread\
      \ 0 --outFilterMatchNminOverLread 0 --outFilterMultimapScoreRange 0 --clip3pAdapterSeq\
      \ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA --seedSearchStartLmax 50 --outFilterMatchNmin\
      \ 25 --limitOutSJcollapsed 2000000`\n  Long Reads: Same as Short Reads + `--seedPerReadNmax\
      \ 10000`\n\nThis applies to fastqs provided in the Reads user input \nDo NOT\
      \ set any non-mapping related params like `--genomeDir`, `--outSAMtype`, `--outSAMunmapped`,\
      \ `--readFilesIn`, `--runThreadN`, etc.\nWe use STAR version 2.7.10b\n"
    info:
      config_key: "Custom_STAR_Params"
    example:
    - "--alignIntronMax 6000 --outFilterScoreMinOverLread 0.1 --limitOutSJcollapsed\
      \ 2000000"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--custom_bwa_mem2_params"
    description: "Modify bwa-mem2 alignment parameters - Set this parameter to fully\
      \ override bwa-mem2 mapping parameters used in the pipeline\nThe pipeline does\
      \ not specify any custom mapping params to bwa-mem2 so program default values\
      \ are used\nThis applies to fastqs provided in the Reads_ATAC user input \n\
      Do NOT set any non-mapping related params like `-C`, `-t`, etc.\nWe use bwa-mem2\
      \ version 2.2.1\n"
    info:
      config_key: "Custom_bwa_mem2_Params"
    example:
    - "-k 16 -w 200 -r"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "CWL-runner arguments"
  arguments:
  - type: "boolean"
    name: "--parallel"
    description: "Run jobs in parallel."
    info: null
    default:
    - true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean_true"
    name: "--timestamps"
    description: "Add timestamps to the errors, warnings, and notifications."
    info: null
    direction: "input"
- name: "Undocumented arguments"
  arguments:
  - type: "integer"
    name: "--abseq_umi"
    info:
      config_key: "AbSeq_UMI"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean"
    name: "--target_analysis"
    info:
      config_key: "Target_analysis"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--vdj_jgene_evalue"
    description: "e-value threshold for J gene. The e-value threshold for J gene call\
      \ by IgBlast/PyIR, default is set as 0.001\n"
    info:
      config_key: "VDJ_JGene_Evalue"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--vdj_vgene_evalue"
    description: "e-value threshold for V gene. The e-value threshold for V gene call\
      \ by IgBlast/PyIR, default is set as 0.001\n"
    info:
      config_key: "VDJ_VGene_Evalue"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "boolean"
    name: "--write_filtered_reads"
    info:
      config_key: "Write_Filtered_Reads"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
resources:
- type: "python_script"
  path: "script.py"
  is_executable: true
description: "BD Rhapsody Sequence Analysis CWL pipeline v2.2.\n\nThis pipeline performs\
  \ analysis of single-cell multiomic sequence read (FASTQ) data. The supported\n\
  sequencing libraries are those generated by the BD Rhapsody™ assay kits, including:\
  \ Whole Transcriptome\nmRNA (WTA), Targeted mRNA, AbSeq Antibody-Oligonucleotides\
  \ (ABC), Single-Cell Multiplexing (SMK),\nTCR/BCR (VDJ), and ATAC-Seq.\n"
test_resources:
- type: "python_script"
  path: "test.py"
  is_executable: true
- type: "file"
  path: "test_data"
- type: "file"
  path: "helpers"
info: null
status: "enabled"
requirements:
  commands:
  - "ps"
keywords:
- "rna-seq"
- "single-cell"
- "multiomic"
- "atac-seq"
- "targeted"
- "abseq"
- "tcr"
- "bcr"
license: "Unknown"
links:
  repository: "https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1"
  documentation: "https://bd-rhapsody-bioinfo-docs.genomics.bd.com"
runners:
- type: "executable"
  id: "executable"
  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
  id: "nextflow"
  directives:
    tag: "$id"
  auto:
    simplifyInput: true
    simplifyOutput: false
    transcript: false
    publish: false
  config:
    labels:
      mem1gb: "memory = 1000000000.B"
      mem2gb: "memory = 2000000000.B"
      mem5gb: "memory = 5000000000.B"
      mem10gb: "memory = 10000000000.B"
      mem20gb: "memory = 20000000000.B"
      mem50gb: "memory = 50000000000.B"
      mem100gb: "memory = 100000000000.B"
      mem200gb: "memory = 200000000000.B"
      mem500gb: "memory = 500000000000.B"
      mem1tb: "memory = 1000000000000.B"
      mem2tb: "memory = 2000000000000.B"
      mem5tb: "memory = 5000000000000.B"
      mem10tb: "memory = 10000000000000.B"
      mem20tb: "memory = 20000000000000.B"
      mem50tb: "memory = 50000000000000.B"
      mem100tb: "memory = 100000000000000.B"
      mem200tb: "memory = 200000000000000.B"
      mem500tb: "memory = 500000000000000.B"
      mem1gib: "memory = 1073741824.B"
      mem2gib: "memory = 2147483648.B"
      mem4gib: "memory = 4294967296.B"
      mem8gib: "memory = 8589934592.B"
      mem16gib: "memory = 17179869184.B"
      mem32gib: "memory = 34359738368.B"
      mem64gib: "memory = 68719476736.B"
      mem128gib: "memory = 137438953472.B"
      mem256gib: "memory = 274877906944.B"
      mem512gib: "memory = 549755813888.B"
      mem1tib: "memory = 1099511627776.B"
      mem2tib: "memory = 2199023255552.B"
      mem4tib: "memory = 4398046511104.B"
      mem8tib: "memory = 8796093022208.B"
      mem16tib: "memory = 17592186044416.B"
      mem32tib: "memory = 35184372088832.B"
      mem64tib: "memory = 70368744177664.B"
      mem128tib: "memory = 140737488355328.B"
      mem256tib: "memory = 281474976710656.B"
      mem512tib: "memory = 562949953421312.B"
      cpu1: "cpus = 1"
      cpu2: "cpus = 2"
      cpu5: "cpus = 5"
      cpu10: "cpus = 10"
      cpu20: "cpus = 20"
      cpu50: "cpus = 50"
      cpu100: "cpus = 100"
      cpu200: "cpus = 200"
      cpu500: "cpus = 500"
      cpu1000: "cpus = 1000"
  debug: false
  container: "docker"
engines:
- type: "docker"
  id: "docker"
  image: "bdgenomics/rhapsody:2.2.1"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
  setup:
  - type: "apt"
    packages:
    - "procps"
    - "git"
    interactive: false
  - type: "python"
    user: false
    packages:
    - "cwlref-runner"
    - "cwl-runner"
    upgrade: true
  - type: "docker"
    run:
    - "mkdir /var/bd_rhapsody_cwl && \\\n  cd /var/bd_rhapsody_cwl && \\\n  git clone\
      \ https://bitbucket.org/CRSwDev/cwl.git . && \\\n  git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de\n"
  - type: "docker"
    run:
    - "VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)"
    - "echo \"bdgenomics/rhapsody: \\\"$VERSION\\\"\" > /var/software_versions.txt"
  test_setup:
  - type: "python"
    user: false
    packages:
    - "biopython"
    - "gffutils"
    upgrade: true
  entrypoint: []
  cmd: null
- type: "native"
  id: "native"
build_info:
  config: "src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml"
  runner: "executable"
  engine: "docker|native"
  output: "target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis"
  executable: "target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis"
  viash_version: "0.9.0"
  git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
  git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
  git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
  name: "biobox"
  version: "main"
  description: "A collection of bioinformatics tools for working with sequence data.\n"
  info: null
  viash_version: "0.9.0"
  source: "src"
  target: "target"
  config_mods:
  - ".requirements.commands := ['ps']\n"
  - ".engines += { type: \"native\" }"
  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
  - ".engines[.type == 'docker'].target_tag := 'main'"
  keywords:
  - "bioinformatics"
  - "modules"
  - "sequencing"
  license: "MIT"
  organization: "vsh"
  links:
    repository: "https://github.com/viash-hub/biobox"
    issue_tracker: "https://github.com/viash-hub/biobox/issues"