Build pipeline: viash-hub.biobox.main-zp6tq
Source commit: 7f8bcc2b3e
Source message: BD rhapsody sequence analysis (#96)
* wip
* fix test
* add help
* update 2.2 args
* fix bug
* extend test data
* output separate files
* analyse missing args
* tweaks to test
* fix script
* fix test
* fix test
* move small reference
* wip generate wta test data
* don't forget about umi in r1
* remove unneeded pkg
* load reference in memory just once
* fix random choices
* extend test
* add abc immunediscoverypanel
* wip abc testing code
* fix abc test; need unique instrument, run and flowcell ids for each sample
* add smk data
* add entry to changelog
* remove old test file
* adapt test for missing read
* update description
* add comment
* ensure cwl files are absolute
* Apply suggestions from code review
Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
* fix suggestion
* newer pipelines have docker requirements as a hint instead of a strict requirement
* rename str to content
* remove deleted resources
* fix containers
* fix script
* fix suggestion
* fix suggestion...
* fix test
* fix component name
* fix test
* apply suggestions
* fix test
* added note
* fix changelog
* fix changelog again
* splitting hairs here
---------
Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
1143 lines
35 KiB
YAML
1143 lines
35 KiB
YAML
name: "bd_rhapsody_sequence_analysis"
|
|
namespace: "bd_rhapsody"
|
|
version: "main"
|
|
authors:
|
|
- name: "Robrecht Cannoodt"
|
|
roles:
|
|
- "author"
|
|
- "maintainer"
|
|
info:
|
|
links:
|
|
email: "robrecht@data-intuitive.com"
|
|
github: "rcannood"
|
|
orcid: "0000-0003-3641-729X"
|
|
linkedin: "robrechtcannoodt"
|
|
organizations:
|
|
- name: "Data Intuitive"
|
|
href: "https://www.data-intuitive.com"
|
|
role: "Data Science Engineer"
|
|
- name: "Open Problems"
|
|
href: "https://openproblems.bio"
|
|
role: "Core Member"
|
|
- name: "Weiwei Schultz"
|
|
roles:
|
|
- "contributor"
|
|
info:
|
|
organizations:
|
|
- name: "Janssen R&D US"
|
|
role: "Associate Director Data Sciences"
|
|
argument_groups:
|
|
- name: "Inputs"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--reads"
|
|
description: "Reads (optional) - Path to your FASTQ.GZ formatted read files from\
|
|
\ libraries that may include:\n\n- WTA mRNA\n- Targeted mRNA\n- AbSeq\n- Sample\
|
|
\ Multiplexing\n- VDJ\n\nYou may specify as many R1/R2 read pairs as you want.\n"
|
|
info:
|
|
config_key: "Reads"
|
|
example:
|
|
- "WTALibrary_S1_L001_R1_001.fastq.gz"
|
|
- "WTALibrary_S1_L001_R2_001.fastq.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--reads_atac"
|
|
description: "Path to your FASTQ.GZ formatted read files from ATAC-Seq libraries.\n\
|
|
You may specify as many R1/R2/I2 files as you want.\n"
|
|
info:
|
|
config_key: "Reads_ATAC"
|
|
example:
|
|
- "ATACLibrary_S2_L001_R1_001.fastq.gz"
|
|
- "ATACLibrary_S2_L001_R2_001.fastq.gz"
|
|
- "ATACLibrary_S2_L001_I2_001.fastq.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- name: "References"
|
|
description: "Assay type will be inferred from the provided reference(s).\nDo not\
|
|
\ provide both reference_archive and targeted_reference at the same time.\n\n\
|
|
Valid reference input combinations:\n - reference_archive: WTA only\n - reference_archive\
|
|
\ & abseq_reference: WTA + AbSeq\n - reference_archive & supplemental_reference:\
|
|
\ WTA + extra transgenes\n - reference_archive & abseq_reference & supplemental_reference:\
|
|
\ WTA + AbSeq + extra transgenes\n - reference_archive: WTA + ATAC or ATAC only\n\
|
|
\ - reference_archive & supplemental_reference: WTA + ATAC + extra transgenes\n\
|
|
\ - targeted_reference: Targeted only\n - targeted_reference & abseq_reference:\
|
|
\ Targeted + AbSeq\n - abseq_reference: AbSeq only\n\nThe reference_archive can\
|
|
\ be generated with the bd_rhapsody_make_reference component.\nAlternatively,\
|
|
\ BD also provides standard references which can be downloaded from these locations:\n\
|
|
\n - Human: https://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-WTA/Pipeline-version2.x_WTA_references/RhapRef_Human_WTA_2023-02.tar.gz\n\
|
|
\ - Mouse: https://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-WTA/Pipeline-version2.x_WTA_references/RhapRef_Mouse_WTA_2023-02.tar.gz\n"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--reference_archive"
|
|
description: "Path to Rhapsody WTA Reference in the tar.gz format.\n\nStructure\
|
|
\ of the reference archive:\n\n- `BD_Rhapsody_Reference_Files/`: top level folder\n\
|
|
\ - `star_index/`: sub-folder containing STAR index, that is files created\
|
|
\ with `STAR --runMode genomeGenerate`\n - GTF for gene-transcript-annotation\
|
|
\ e.g. \"gencode.v43.primary_assembly.annotation.gtf\"\n"
|
|
info:
|
|
config_key: "Reference_Archive"
|
|
example:
|
|
- "RhapRef_Human_WTA_2023-02.tar.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--targeted_reference"
|
|
description: "Path to the targeted reference file in FASTA format.\n"
|
|
info:
|
|
config_key: "Targeted_Reference"
|
|
example:
|
|
- "BD_Rhapsody_Immune_Response_Panel_Hs.fasta"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--abseq_reference"
|
|
description: "Path to the AbSeq reference file in FASTA format. Only needed if\
|
|
\ BD AbSeq Ab-Oligos are used."
|
|
info:
|
|
config_key: "AbSeq_Reference"
|
|
example:
|
|
- "AbSeq_reference.fasta"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--supplemental_reference"
|
|
alternatives:
|
|
- "-s"
|
|
description: "Path to the supplemental reference file in FASTA format. Only needed\
|
|
\ if there are additional transgene sequences to be aligned against in a WTA\
|
|
\ assay experiment."
|
|
info:
|
|
config_key: "Supplemental_Reference"
|
|
example:
|
|
- "supplemental_reference.fasta"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- name: "Outputs"
|
|
description: "Outputs for all pipeline runs"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--output_dir"
|
|
alternatives:
|
|
- "-o"
|
|
description: "The unprocessed output directory containing all the outputs from\
|
|
\ the pipeline."
|
|
info: null
|
|
example:
|
|
- "output_dir"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: true
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--output_seurat"
|
|
description: "Single-cell analysis tool inputs. Seurat (.rds) input file containing\
|
|
\ RSEC molecules data table and all cell annotation metadata."
|
|
info:
|
|
template: "[sample_name]_Seurat.rds"
|
|
example:
|
|
- "output_seurat.rds"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--output_mudata"
|
|
description: "Single-cell analysis tool inputs. Scanpy / Muon input file containing\
|
|
\ RSEC molecules data table and all cell annotation metadata."
|
|
info:
|
|
template: "[sample_name].h5mu"
|
|
example:
|
|
- "output_mudata.h5mu"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--metrics_summary"
|
|
description: "Metrics Summary. Report containing sequencing, molecules, and cell\
|
|
\ metrics."
|
|
info:
|
|
template: "[sample_name]_Metrics_Summary.csv"
|
|
example:
|
|
- "metrics_summary.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--pipeline_report"
|
|
description: "Pipeline Report. Summary report containing the results from the\
|
|
\ sequencing analysis pipeline run."
|
|
info:
|
|
template: "[sample_name]_Pipeline_Report.html"
|
|
example:
|
|
- "pipeline_report.html"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--rsec_mols_per_cell"
|
|
description: "Molecules per bioproduct per cell bassed on RSEC"
|
|
info:
|
|
template: "[sample_name]_RSEC_MolsPerCell_MEX.zip"
|
|
example:
|
|
- "RSEC_MolsPerCell_MEX.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--dbec_mols_per_cell"
|
|
description: "Molecules per bioproduct per cell bassed on DBEC. DBEC data table\
|
|
\ is only output if the experiment includes targeted mRNA or AbSeq bioproducts."
|
|
info:
|
|
template: "[sample_name]_DBEC_MolsPerCell_MEX.zip"
|
|
example:
|
|
- "DBEC_MolsPerCell_MEX.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--rsec_mols_per_cell_unfiltered"
|
|
description: "Unfiltered tables containing all cell labels with ≥10 reads."
|
|
info:
|
|
template: "[sample_name]_RSEC_MolsPerCell_Unfiltered_MEX.zip"
|
|
example:
|
|
- "RSEC_MolsPerCell_Unfiltered_MEX.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--bam"
|
|
description: "Alignment file of R2 with associated R1 annotations for Bioproduct."
|
|
info:
|
|
template: "[sample_name]_Bioproduct.bam"
|
|
example:
|
|
- "BioProduct.bam"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--bam_index"
|
|
description: "Index file for the alignment file."
|
|
info:
|
|
template: "[sample_name]_Bioproduct.bam.bai"
|
|
example:
|
|
- "BioProduct.bam.bai"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--bioproduct_stats"
|
|
description: "Bioproduct Stats. Metrics from RSEC and DBEC Unique Molecular Identifier\
|
|
\ adjustment algorithms on a per-bioproduct basis."
|
|
info:
|
|
template: "[sample_name]_Bioproduct_Stats.csv"
|
|
example:
|
|
- "Bioproduct_Stats.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--dimred_tsne"
|
|
description: "t-SNE dimensionality reduction coordinates per cell index"
|
|
info:
|
|
template: "[sample_name]_(assay)_tSNE_coordinates.csv"
|
|
example:
|
|
- "tSNE_coordinates.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--dimred_umap"
|
|
description: "UMAP dimensionality reduction coordinates per cell index"
|
|
info:
|
|
template: "[sample_name]_(assay)_UMAP_coordinates.csv"
|
|
example:
|
|
- "UMAP_coordinates.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--immune_cell_classification"
|
|
description: "Immune Cell Classification. Cell type classification based on the\
|
|
\ expression of immune cell markers."
|
|
info:
|
|
template: "[sample_name]_(assay)_cell_type_experimental.csv"
|
|
example:
|
|
- "Immune_Cell_Classification.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Multiplex outputs"
|
|
description: "Outputs when multiplex option is selected"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--sample_tag_metrics"
|
|
description: "Sample Tag Metrics. Metrics from the sample determination algorithm."
|
|
info:
|
|
template: "[sample_name]_Sample_Tag_Metrics.csv"
|
|
example:
|
|
- "Sample_Tag_Metrics.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--sample_tag_calls"
|
|
description: "Sample Tag Calls. Assigned Sample Tag for each putative cell"
|
|
info:
|
|
template: "[sample_name]_Sample_Tag_Calls.csv"
|
|
example:
|
|
- "Sample_Tag_Calls.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--sample_tag_counts"
|
|
description: "Sample Tag Counts. Separate data tables and metric summary for cells\
|
|
\ assigned to each sample tag. Note: For putative cells that could not be assigned\
|
|
\ a specific Sample Tag, a Multiplet_and_Undetermined.zip file is also output."
|
|
info:
|
|
template: "[sample_name]_Sample_Tag[number].zip"
|
|
example:
|
|
- "Sample_Tag1.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--sample_tag_counts_unassigned"
|
|
description: "Sample Tag Counts Unassigned. Data table and metric summary for\
|
|
\ cells that could not be assigned a specific Sample Tag."
|
|
info:
|
|
template: "[sample_name]_Multiplet_and_Undetermined.zip"
|
|
example:
|
|
- "Multiplet_and_Undetermined.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "VDJ Outputs"
|
|
description: "Outputs when VDJ option selected"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--vdj_metrics"
|
|
description: "VDJ Metrics. Overall metrics from the VDJ analysis."
|
|
info:
|
|
template: "[sample_name]_VDJ_Metrics.csv"
|
|
example:
|
|
- "VDJ_Metrics.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--vdj_per_cell"
|
|
description: "VDJ Per Cell. Cell specific read and molecule counts, VDJ gene segments,\
|
|
\ CDR3 sequences, paired chains, and cell type."
|
|
info:
|
|
template: "[sample_name]_VDJ_perCell.csv"
|
|
example:
|
|
- "VDJ_perCell.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--vdj_per_cell_uncorrected"
|
|
description: "VDJ Per Cell Uncorrected. Cell specific read and molecule counts,\
|
|
\ VDJ gene segments, CDR3 sequences, paired chains, and cell type."
|
|
info:
|
|
template: "[sample_name]_VDJ_perCell_uncorrected.csv"
|
|
example:
|
|
- "VDJ_perCell_uncorrected.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--vdj_dominant_contigs"
|
|
description: "VDJ Dominant Contigs. Dominant contig for each cell label chain\
|
|
\ type combination (putative cells only)."
|
|
info:
|
|
template: "[sample_name]_VDJ_Dominant_Contigs_AIRR.csv"
|
|
example:
|
|
- "VDJ_Dominant_Contigs_AIRR.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--vdj_unfiltered_contigs"
|
|
description: "VDJ Unfiltered Contigs. All contigs that were assembled and annotated\
|
|
\ successfully (all cells)."
|
|
info:
|
|
template: "[sample_name]_VDJ_Unfiltered_Contigs_AIRR.csv"
|
|
example:
|
|
- "VDJ_Unfiltered_Contigs_AIRR.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "ATAC-Seq outputs"
|
|
description: "Outputs when ATAC-Seq option selected"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--atac_metrics"
|
|
description: "ATAC Metrics. Overall metrics from the ATAC-Seq analysis."
|
|
info:
|
|
template: "[sample_name]_ATAC_Metrics.csv"
|
|
example:
|
|
- "ATAC_Metrics.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_metrics_json"
|
|
description: "ATAC Metrics JSON. Overall metrics from the ATAC-Seq analysis in\
|
|
\ JSON format."
|
|
info:
|
|
template: "[sample_name]_ATAC_Metrics.json"
|
|
example:
|
|
- "ATAC_Metrics.json"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_fragments"
|
|
description: "ATAC Fragments. Chromosomal location, cell index, and read support\
|
|
\ for each fragment detected"
|
|
info:
|
|
template: "[sample_name]_ATAC_Fragments.bed.gz"
|
|
example:
|
|
- "ATAC_Fragments.bed.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_fragments_index"
|
|
description: "Index of ATAC Fragments."
|
|
info:
|
|
template: "[sample_name]_ATAC_Fragments.bed.gz.tbi"
|
|
example:
|
|
- "ATAC_Fragments.bed.gz.tbi"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_transposase_sites"
|
|
description: "ATAC Transposase Sites. Chromosomal location, cell index, and read\
|
|
\ support for each transposase site detected"
|
|
info:
|
|
template: "[sample_name]_ATAC_Transposase_Sites.bed.gz"
|
|
example:
|
|
- "ATAC_Transposase_Sites.bed.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_transposase_sites_index"
|
|
description: "Index of ATAC Transposase Sites."
|
|
info:
|
|
template: "[sample_name]_ATAC_Transposase_Sites.bed.gz.tbi"
|
|
example:
|
|
- "ATAC_Transposase_Sites.bed.gz.tbi"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_peaks"
|
|
description: "ATAC Peaks. Peak regions of transposase activity"
|
|
info:
|
|
template: "[sample_name]_ATAC_Peaks.bed.gz"
|
|
example:
|
|
- "ATAC_Peaks.bed.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_peaks_index"
|
|
description: "Index of ATAC Peaks."
|
|
info:
|
|
template: "[sample_name]_ATAC_Peaks.bed.gz.tbi"
|
|
example:
|
|
- "ATAC_Peaks.bed.gz.tbi"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_peak_annotation"
|
|
description: "ATAC Peak Annotation. Estimated annotation of peak-to-gene connections"
|
|
info:
|
|
template: "[sample_name]_peak_annotation.tsv.gz"
|
|
example:
|
|
- "peak_annotation.tsv.gz"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_cell_by_peak"
|
|
description: "ATAC Cell by Peak. Peak regions of transposase activity per cell"
|
|
info:
|
|
template: "[sample_name]_ATAC_Cell_by_Peak_MEX.zip"
|
|
example:
|
|
- "ATAC_Cell_by_Peak_MEX.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_cell_by_peak_unfiltered"
|
|
description: "ATAC Cell by Peak Unfiltered. Unfiltered file containing all cell\
|
|
\ labels with >=1 transposase sites in peaks."
|
|
info:
|
|
template: "[sample_name]_ATAC_Cell_by_Peak_Unfiltered_MEX.zip"
|
|
example:
|
|
- "ATAC_Cell_by_Peak_Unfiltered_MEX.zip"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_bam"
|
|
description: "ATAC BAM. Alignment file for R1 and R2 with associated I2 annotations\
|
|
\ for ATAC-Seq. Only output if the BAM generation flag is set to true."
|
|
info:
|
|
template: "[sample_name]_ATAC.bam"
|
|
example:
|
|
- "ATAC.bam"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--atac_bam_index"
|
|
description: "Index of ATAC BAM."
|
|
info:
|
|
template: "[sample_name]_ATAC.bam.bai"
|
|
example:
|
|
- "ATAC.bam.bai"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "AbSeq Cell Calling outputs"
|
|
description: "Outputs when Cell Calling Abseq is selected"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--protein_aggregates_experimental"
|
|
description: "Protein Aggregates Experimental"
|
|
info:
|
|
template: "[sample_name]_Protein_Aggregates_Experimental.csv"
|
|
example:
|
|
- "Protein_Aggregates_Experimental.csv"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Putative Cell Calling Settings"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--cell_calling_data"
|
|
description: "Specify the dataset to be used for putative cell calling: mRNA,\
|
|
\ AbSeq, ATAC, mRNA_and_ATAC\n\nFor putative cell calling using an AbSeq dataset,\
|
|
\ please provide an AbSeq_Reference fasta file above.\n\nFor putative cell calling\
|
|
\ using an ATAC dataset, please provide a WTA+ATAC-Seq Reference_Archive file\
|
|
\ above.\n\nThe default data for putative cell calling, will be determined the\
|
|
\ following way:\n\n- If mRNA Reads and ATAC Reads exist: mRNA_and_ATAC\n- If\
|
|
\ only ATAC Reads exist: ATAC\n- Otherwise: mRNA\n"
|
|
info:
|
|
config_key: "Cell_Calling_Data"
|
|
example:
|
|
- "mRNA"
|
|
required: false
|
|
choices:
|
|
- "mRNA"
|
|
- "AbSeq"
|
|
- "ATAC"
|
|
- "mRNA_and_ATAC"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--cell_calling_bioproduct_algorithm"
|
|
description: "Specify the bioproduct algorithm to be used for putative cell calling:\
|
|
\ Basic or Refined\n\nBy default, the Basic algorithm will be used for putative\
|
|
\ cell calling.\n"
|
|
info:
|
|
config_key: "Cell_Calling_Bioproduct_Algorithm"
|
|
example:
|
|
- "Basic"
|
|
required: false
|
|
choices:
|
|
- "Basic"
|
|
- "Refined"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--cell_calling_atac_algorithm"
|
|
description: "Specify the ATAC-seq algorithm to be used for putative cell calling:\
|
|
\ Basic or Refined\n\nBy default, the Basic algorithm will be used for putative\
|
|
\ cell calling.\n"
|
|
info:
|
|
config_key: "Cell_Calling_ATAC_Algorithm"
|
|
example:
|
|
- "Basic"
|
|
required: false
|
|
choices:
|
|
- "Basic"
|
|
- "Refined"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--exact_cell_count"
|
|
description: "Set a specific number (>=1) of cells as putative, based on those\
|
|
\ with the highest error-corrected read count\n"
|
|
info:
|
|
config_key: "Exact_Cell_Count"
|
|
example:
|
|
- 10000
|
|
required: false
|
|
min: 1
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--expected_cell_count"
|
|
description: "Guide the basic putative cell calling algorithm by providing an\
|
|
\ estimate of the number of cells expected. Usually this can be the number\
|
|
\ of cells loaded into the Rhapsody cartridge. If there are multiple inflection\
|
|
\ points on the second derivative cumulative curve, this will ensure the one\
|
|
\ selected is near the expected. \n"
|
|
info:
|
|
config_key: "Expected_Cell_Count"
|
|
example:
|
|
- 20000
|
|
required: false
|
|
min: 1
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Intronic Reads Settings"
|
|
arguments:
|
|
- type: "boolean"
|
|
name: "--exclude_intronic_reads"
|
|
description: "By default, the flag is false, and reads aligned to exons and introns\
|
|
\ are considered and represented in molecule counts. When the flag is set to\
|
|
\ true, intronic reads will be excluded.\nThe value can be true or false.\n"
|
|
info:
|
|
config_key: "Exclude_Intronic_Reads"
|
|
example:
|
|
- false
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Multiplex Settings"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--sample_tags_version"
|
|
description: "Specify the version of the Sample Tags used in the run:\n\n* If\
|
|
\ Sample Tag Multiplexing was done, specify the appropriate version: human,\
|
|
\ mouse, flex, nuclei_includes_mrna, nuclei_atac_only\n* If this is an SMK +\
|
|
\ Nuclei mRNA run or an SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq) run (and not\
|
|
\ an SMK + ATAC-Seq only run), choose the \"nuclei_includes_mrna\" option.\n\
|
|
* If this is an SMK + ATAC-Seq only run (and not SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq)),\
|
|
\ choose the \"nuclei_atac_only\" option.\n"
|
|
info:
|
|
config_key: "Sample_Tags_Version"
|
|
example:
|
|
- "human"
|
|
required: false
|
|
choices:
|
|
- "human"
|
|
- "mouse"
|
|
- "flex"
|
|
- "nuclei_includes_mrna"
|
|
- "nuclei_atac_only"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--tag_names"
|
|
description: "Specify the tag number followed by '-' and the desired sample name\
|
|
\ to appear in Sample_Tag_Metrics.csv\nDo not use the special characters: &,\
|
|
\ (), [], {}, <>, ?, |\n"
|
|
info:
|
|
config_key: "Tag_Names"
|
|
example:
|
|
- "4-mySample"
|
|
- "9-myOtherSample"
|
|
- "6-alsoThisSample"
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- name: "VDJ arguments"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--vdj_version"
|
|
description: "If VDJ was done, specify the appropriate option: human, mouse, humanBCR,\
|
|
\ humanTCR, mouseBCR, mouseTCR\n"
|
|
info:
|
|
config_key: "VDJ_Version"
|
|
example:
|
|
- "human"
|
|
required: false
|
|
choices:
|
|
- "human"
|
|
- "mouse"
|
|
- "humanBCR"
|
|
- "humanTCR"
|
|
- "mouseBCR"
|
|
- "mouseTCR"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "ATAC options"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--predefined_atac_peaks"
|
|
description: "An optional BED file containing pre-established chromatin accessibility\
|
|
\ peak regions for generating the ATAC cell-by-peak matrix."
|
|
info:
|
|
config_key: "Predefined_ATAC_Peaks"
|
|
example:
|
|
- "predefined_peaks.bed"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Additional options"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--run_name"
|
|
description: "Specify a run name to use as the output file base name. Use only\
|
|
\ letters, numbers, or hyphens. Do not use special characters or spaces.\n"
|
|
info:
|
|
config_key: "Run_Name"
|
|
default:
|
|
- "sample"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean"
|
|
name: "--generate_bam"
|
|
description: "Specify whether to create the BAM file output\n"
|
|
info:
|
|
config_key: "Generate_Bam"
|
|
default:
|
|
- false
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean"
|
|
name: "--long_reads"
|
|
description: "Use STARlong (default: undefined - i.e. autodetects based on read\
|
|
\ lengths) - Specify if the STARlong aligner should be used instead of STAR.\
|
|
\ Set to true if the reads are longer than 650bp.\n"
|
|
info:
|
|
config_key: "Long_Reads"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Advanced options"
|
|
description: "NOTE: Only change these if you are really sure about what you are\
|
|
\ doing\n"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--custom_star_params"
|
|
description: "Modify STAR alignment parameters - Set this parameter to fully override\
|
|
\ default STAR mapping parameters used in the pipeline.\nFor reference this\
|
|
\ is the default that is used:\n\n Short Reads: `--outFilterScoreMinOverLread\
|
|
\ 0 --outFilterMatchNminOverLread 0 --outFilterMultimapScoreRange 0 --clip3pAdapterSeq\
|
|
\ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA --seedSearchStartLmax 50 --outFilterMatchNmin\
|
|
\ 25 --limitOutSJcollapsed 2000000`\n Long Reads: Same as Short Reads + `--seedPerReadNmax\
|
|
\ 10000`\n\nThis applies to fastqs provided in the Reads user input \nDo NOT\
|
|
\ set any non-mapping related params like `--genomeDir`, `--outSAMtype`, `--outSAMunmapped`,\
|
|
\ `--readFilesIn`, `--runThreadN`, etc.\nWe use STAR version 2.7.10b\n"
|
|
info:
|
|
config_key: "Custom_STAR_Params"
|
|
example:
|
|
- "--alignIntronMax 6000 --outFilterScoreMinOverLread 0.1 --limitOutSJcollapsed\
|
|
\ 2000000"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--custom_bwa_mem2_params"
|
|
description: "Modify bwa-mem2 alignment parameters - Set this parameter to fully\
|
|
\ override bwa-mem2 mapping parameters used in the pipeline\nThe pipeline does\
|
|
\ not specify any custom mapping params to bwa-mem2 so program default values\
|
|
\ are used\nThis applies to fastqs provided in the Reads_ATAC user input \n\
|
|
Do NOT set any non-mapping related params like `-C`, `-t`, etc.\nWe use bwa-mem2\
|
|
\ version 2.2.1\n"
|
|
info:
|
|
config_key: "Custom_bwa_mem2_Params"
|
|
example:
|
|
- "-k 16 -w 200 -r"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "CWL-runner arguments"
|
|
arguments:
|
|
- type: "boolean"
|
|
name: "--parallel"
|
|
description: "Run jobs in parallel."
|
|
info: null
|
|
default:
|
|
- true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--timestamps"
|
|
description: "Add timestamps to the errors, warnings, and notifications."
|
|
info: null
|
|
direction: "input"
|
|
- name: "Undocumented arguments"
|
|
arguments:
|
|
- type: "integer"
|
|
name: "--abseq_umi"
|
|
info:
|
|
config_key: "AbSeq_UMI"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean"
|
|
name: "--target_analysis"
|
|
info:
|
|
config_key: "Target_analysis"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "double"
|
|
name: "--vdj_jgene_evalue"
|
|
description: "e-value threshold for J gene. The e-value threshold for J gene call\
|
|
\ by IgBlast/PyIR, default is set as 0.001\n"
|
|
info:
|
|
config_key: "VDJ_JGene_Evalue"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "double"
|
|
name: "--vdj_vgene_evalue"
|
|
description: "e-value threshold for V gene. The e-value threshold for V gene call\
|
|
\ by IgBlast/PyIR, default is set as 0.001\n"
|
|
info:
|
|
config_key: "VDJ_VGene_Evalue"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean"
|
|
name: "--write_filtered_reads"
|
|
info:
|
|
config_key: "Write_Filtered_Reads"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
resources:
|
|
- type: "python_script"
|
|
path: "script.py"
|
|
is_executable: true
|
|
description: "BD Rhapsody Sequence Analysis CWL pipeline v2.2.\n\nThis pipeline performs\
|
|
\ analysis of single-cell multiomic sequence read (FASTQ) data. The supported\n\
|
|
sequencing libraries are those generated by the BD Rhapsody™ assay kits, including:\
|
|
\ Whole Transcriptome\nmRNA (WTA), Targeted mRNA, AbSeq Antibody-Oligonucleotides\
|
|
\ (ABC), Single-Cell Multiplexing (SMK),\nTCR/BCR (VDJ), and ATAC-Seq.\n"
|
|
test_resources:
|
|
- type: "python_script"
|
|
path: "test.py"
|
|
is_executable: true
|
|
- type: "file"
|
|
path: "test_data"
|
|
- type: "file"
|
|
path: "helpers"
|
|
info: null
|
|
status: "enabled"
|
|
requirements:
|
|
commands:
|
|
- "ps"
|
|
keywords:
|
|
- "rna-seq"
|
|
- "single-cell"
|
|
- "multiomic"
|
|
- "atac-seq"
|
|
- "targeted"
|
|
- "abseq"
|
|
- "tcr"
|
|
- "bcr"
|
|
license: "Unknown"
|
|
links:
|
|
repository: "https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1"
|
|
documentation: "https://bd-rhapsody-bioinfo-docs.genomics.bd.com"
|
|
runners:
|
|
- type: "executable"
|
|
id: "executable"
|
|
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
|
- type: "nextflow"
|
|
id: "nextflow"
|
|
directives:
|
|
tag: "$id"
|
|
auto:
|
|
simplifyInput: true
|
|
simplifyOutput: false
|
|
transcript: false
|
|
publish: false
|
|
config:
|
|
labels:
|
|
mem1gb: "memory = 1000000000.B"
|
|
mem2gb: "memory = 2000000000.B"
|
|
mem5gb: "memory = 5000000000.B"
|
|
mem10gb: "memory = 10000000000.B"
|
|
mem20gb: "memory = 20000000000.B"
|
|
mem50gb: "memory = 50000000000.B"
|
|
mem100gb: "memory = 100000000000.B"
|
|
mem200gb: "memory = 200000000000.B"
|
|
mem500gb: "memory = 500000000000.B"
|
|
mem1tb: "memory = 1000000000000.B"
|
|
mem2tb: "memory = 2000000000000.B"
|
|
mem5tb: "memory = 5000000000000.B"
|
|
mem10tb: "memory = 10000000000000.B"
|
|
mem20tb: "memory = 20000000000000.B"
|
|
mem50tb: "memory = 50000000000000.B"
|
|
mem100tb: "memory = 100000000000000.B"
|
|
mem200tb: "memory = 200000000000000.B"
|
|
mem500tb: "memory = 500000000000000.B"
|
|
mem1gib: "memory = 1073741824.B"
|
|
mem2gib: "memory = 2147483648.B"
|
|
mem4gib: "memory = 4294967296.B"
|
|
mem8gib: "memory = 8589934592.B"
|
|
mem16gib: "memory = 17179869184.B"
|
|
mem32gib: "memory = 34359738368.B"
|
|
mem64gib: "memory = 68719476736.B"
|
|
mem128gib: "memory = 137438953472.B"
|
|
mem256gib: "memory = 274877906944.B"
|
|
mem512gib: "memory = 549755813888.B"
|
|
mem1tib: "memory = 1099511627776.B"
|
|
mem2tib: "memory = 2199023255552.B"
|
|
mem4tib: "memory = 4398046511104.B"
|
|
mem8tib: "memory = 8796093022208.B"
|
|
mem16tib: "memory = 17592186044416.B"
|
|
mem32tib: "memory = 35184372088832.B"
|
|
mem64tib: "memory = 70368744177664.B"
|
|
mem128tib: "memory = 140737488355328.B"
|
|
mem256tib: "memory = 281474976710656.B"
|
|
mem512tib: "memory = 562949953421312.B"
|
|
cpu1: "cpus = 1"
|
|
cpu2: "cpus = 2"
|
|
cpu5: "cpus = 5"
|
|
cpu10: "cpus = 10"
|
|
cpu20: "cpus = 20"
|
|
cpu50: "cpus = 50"
|
|
cpu100: "cpus = 100"
|
|
cpu200: "cpus = 200"
|
|
cpu500: "cpus = 500"
|
|
cpu1000: "cpus = 1000"
|
|
debug: false
|
|
container: "docker"
|
|
engines:
|
|
- type: "docker"
|
|
id: "docker"
|
|
image: "bdgenomics/rhapsody:2.2.1"
|
|
target_registry: "images.viash-hub.com"
|
|
target_tag: "main"
|
|
namespace_separator: "/"
|
|
setup:
|
|
- type: "apt"
|
|
packages:
|
|
- "procps"
|
|
- "git"
|
|
interactive: false
|
|
- type: "python"
|
|
user: false
|
|
packages:
|
|
- "cwlref-runner"
|
|
- "cwl-runner"
|
|
upgrade: true
|
|
- type: "docker"
|
|
run:
|
|
- "mkdir /var/bd_rhapsody_cwl && \\\n cd /var/bd_rhapsody_cwl && \\\n git clone\
|
|
\ https://bitbucket.org/CRSwDev/cwl.git . && \\\n git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de\n"
|
|
- type: "docker"
|
|
run:
|
|
- "VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)"
|
|
- "echo \"bdgenomics/rhapsody: \\\"$VERSION\\\"\" > /var/software_versions.txt"
|
|
test_setup:
|
|
- type: "python"
|
|
user: false
|
|
packages:
|
|
- "biopython"
|
|
- "gffutils"
|
|
upgrade: true
|
|
entrypoint: []
|
|
cmd: null
|
|
- type: "native"
|
|
id: "native"
|
|
build_info:
|
|
config: "src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml"
|
|
runner: "executable"
|
|
engine: "docker|native"
|
|
output: "target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis"
|
|
executable: "target/executable/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis"
|
|
viash_version: "0.9.0"
|
|
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
|
|
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
|
|
git_tag: "v0.2.0-3-g7f8bcc2"
|
|
package_config:
|
|
name: "biobox"
|
|
version: "main"
|
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
|
info: null
|
|
viash_version: "0.9.0"
|
|
source: "src"
|
|
target: "target"
|
|
config_mods:
|
|
- ".requirements.commands := ['ps']\n"
|
|
- ".engines += { type: \"native\" }"
|
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
|
keywords:
|
|
- "bioinformatics"
|
|
- "modules"
|
|
- "sequencing"
|
|
license: "MIT"
|
|
organization: "vsh"
|
|
links:
|
|
repository: "https://github.com/viash-hub/biobox"
|
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|