Build branch main with version main (7f8bcc2)

Build pipeline: viash-hub.biobox.main-zp6tq

Source commit: 7f8bcc2b3e

Source message: BD rhapsody sequence analysis (#96)

* wip

* fix test

* add help

* update 2.2 args

* fix bug

* extend test data

* output separate files

* analyse missing args

* tweaks to test

* fix script

* fix test

* fix test

* move small reference

* wip generate wta test data

* don't forget about umi in r1

* remove unneeded pkg

* load reference in memory just once

* fix random choices

* extend test

* add abc immunediscoverypanel

* wip abc testing code

* fix abc test; need unique instrument, run and flowcell ids for each sample

* add smk data

* add entry to changelog

* remove old test file

* adapt test for missing read

* update description

* add comment

* ensure cwl files are absolute

* Apply suggestions from code review

Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>

* fix suggestion

* newer pipelines have docker requirements as a hint instead of a strict requirement

* rename str to content

* remove deleted resources

* fix containers

* fix script

* fix suggestion

* fix suggestion...

* fix test

* fix component name

* fix test

* apply suggestions

* fix test

* added note

* fix changelog

* fix changelog again

* splitting hairs here

---------

Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com>
This commit is contained in:
CI
2024-09-17 10:13:18 +00:00
parent f47054953e
commit 6f2f840fd9
269 changed files with 15113 additions and 1100 deletions

View File

@@ -5,6 +5,8 @@
* `agat`:
- `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100).
* `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96).
## MINOR CHANGES
* Upgrade to Viash 0.9.0.

View File

@@ -116,12 +116,11 @@ argument_groups:
resources:
- type: python_script
path: script.py
- path: make_rhap_reference_2.2.1_nodocker.cwl
test_resources:
- type: bash_script
path: test.sh
- path: test_data
- path: ../test_data
requirements:
commands: [ "cwl-runner" ]
@@ -131,12 +130,19 @@ engines:
image: bdgenomics/rhapsody:2.2.1
setup:
- type: apt
packages: [procps]
packages: [procps, git]
- type: python
packages: [cwlref-runner, cwl-runner]
- type: docker
run: |
echo "bdgenomics/rhapsody: 2.2.1" > /var/software_versions.txt
mkdir /var/bd_rhapsody_cwl && \
cd /var/bd_rhapsody_cwl && \
git clone https://bitbucket.org/CRSwDev/cwl.git . && \
git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de
- type: docker
run:
- VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)
- 'echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt'
runners:
- type: executable

View File

@@ -1,115 +0,0 @@
requirements:
InlineJavascriptRequirement: {}
class: CommandLineTool
label: Reference Files Generator for BD Rhapsodyâ„¢ Sequencing Analysis Pipeline
cwlVersion: v1.2
doc: >-
The Reference Files Generator creates an archive containing Genome Index and Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. The app takes as input one or more FASTA and GTF files and produces a compressed archive in the form of a tar.gz file. The archive contains:\n - STAR index\n - Filtered GTF file
baseCommand: run_reference_generator.sh
inputs:
Genome_fasta:
type: File[]
label: Reference Genome
doc: |-
Reference genome file in FASTA format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
inputBinding:
prefix: --reference-genome
shellQuote: false
Gtf:
type: File[]
label: Transcript Annotations
doc: |-
Transcript annotation files in GTF format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode v42 for Human and M31 for Mouse.
inputBinding:
prefix: --gtf
shellQuote: false
Extra_sequences:
type: File[]?
label: Extra Sequences
doc: |-
Additional sequences in FASTA format to use when building the STAR index. (E.g. phiX genome)
inputBinding:
prefix: --extra-sequences
shellQuote: false
Mitochondrial_Contigs:
type: string[]?
default: ["chrM", "chrMT", "M", "MT"]
label: Mitochondrial Contig Names
doc: |-
Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are identified as 'nuclear fragments' in the ATACseq analysis pipeline.
inputBinding:
prefix: --mitochondrial-contigs
shellQuote: false
Filtering_off:
type: boolean?
label: Turn off filtering
doc: |-
By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features having the following attribute values are are kept:
- protein_coding
- lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
- IG_LV_gene
- IG_V_gene
- IG_V_pseudogene
- IG_D_gene
- IG_J_gene
- IG_J_pseudogene
- IG_C_gene
- IG_C_pseudogene
- TR_V_gene
- TR_V_pseudogene
- TR_D_gene
- TR_J_gene
- TR_J_pseudogene
- TR_C_gene
If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
inputBinding:
prefix: --filtering-off
shellQuote: false
WTA_Only:
type: boolean?
label: WTA only index
doc: Build a WTA only index, otherwise builds a WTA + ATAC index.
inputBinding:
prefix: --wta-only-index
shellQuote: false
Archive_prefix:
type: string?
label: Archive Prefix
doc: |-
A prefix for naming the compressed archive file containing the Reference genome index and annotation files. The default value is constructed based on the input Reference files.
inputBinding:
prefix: --archive-prefix
shellQuote: false
Extra_STAR_params:
type: string?
label: Extra STAR Params
doc: |-
Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
Example:
--limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
inputBinding:
prefix: --extra-star-params
shellQuote: true
Maximum_threads:
type: int?
label: Maximum Number of Threads
doc: |-
The maximum number of threads to use in the pipeline. By default, all available cores are used.
inputBinding:
prefix: --maximum-threads
shellQuote: false
outputs:
Archive:
type: File
doc: |-
A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an input in the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline.
id: Reference_Archive
label: Reference Files Archive
outputBinding:
glob: '*.tar.gz'

View File

@@ -83,21 +83,21 @@ def generate_config(par: dict[str, Any], meta, config) -> str:
for config_key, arg_type, par_value in config_key_value_pairs:
if arg_type == "file":
str = strip_margin(f"""\
content = strip_margin(f"""\
|{config_key}:
|""")
if isinstance(par_value, list):
for file in par_value:
str += strip_margin(f"""\
content += strip_margin(f"""\
| - class: File
| location: "{file}"
|""")
else:
str += strip_margin(f"""\
content += strip_margin(f"""\
| class: File
| location: "{par_value}"
|""")
content_list.append(str)
content_list.append(content)
else:
content_list.append(strip_margin(f"""\
|{config_key}: {par_value}
@@ -108,9 +108,9 @@ def generate_config(par: dict[str, Any], meta, config) -> str:
def get_cwl_file(meta: dict[str, Any]) -> str:
# create cwl file (if need be)
cwl_file=os.path.join(meta["resources_dir"], "make_rhap_reference_2.2.1_nodocker.cwl")
cwl_file="/var/bd_rhapsody_cwl/v2.2.1/Extra_Utilities/make_rhap_reference_2.2.1.cwl"
return cwl_file
return os.path.abspath(cwl_file)
def main(par: dict[str, Any], meta: dict[str, Any]):
config = read_config(meta["config"])

View File

@@ -1,47 +0,0 @@
#!/bin/bash
TMP_DIR=/tmp/bd_rhapsody_make_reference
OUT_DIR=src/bd_rhapsody/bd_rhapsody_make_reference/test_data
# check if seqkit is installed
if ! command -v seqkit &> /dev/null; then
echo "seqkit could not be found"
exit 1
fi
# create temporary directory and clean up on exit
mkdir -p $TMP_DIR
function clean_up {
rm -rf "$TMP_DIR"
}
trap clean_up EXIT
# fetch reference
ORIG_FA=$TMP_DIR/reference.fa.gz
if [ ! -f $ORIG_FA ]; then
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz \
-O $ORIG_FA
fi
ORIG_GTF=$TMP_DIR/reference.gtf.gz
if [ ! -f $ORIG_GTF ]; then
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz \
-O $ORIG_GTF
fi
# create small reference
START=30000
END=31500
CHR=chr1
# subset to small region
seqkit grep -r -p "^$CHR\$" "$ORIG_FA" | \
seqkit subseq -r "$START:$END" > $OUT_DIR/reference_small.fa
zcat "$ORIG_GTF" | \
awk -v FS='\t' -v OFS='\t' "
\$1 == \"$CHR\" && \$4 >= $START && \$5 <= $END {
\$4 = \$4 - $START + 1;
\$5 = \$5 - $START + 1;
print;
}" > $OUT_DIR/reference_small.gtf

View File

@@ -0,0 +1,116 @@
# Extract arguments from CWL file and write them to arguments.yaml
#
# This script:
# - reads the CWL file
# - extracts the main workflow arguments
# - compares cwl arguments to viash config arguments
# - writes the arguments to arguments.yaml
#
# It can be used to update the arguments in the viash config after an
# update to the CWL file has been made.
#
# Dependencies: tidyverse, jsonlite, yaml, dynutils
#
# Install dependencies:
# ```R
# install.packages(c("tidyverse", "jsonlite", "yaml", "dynutils"))
# ```
#
# Usage:
# ```bash
# Rscript src/bd_rhapsody/bd_rhapsody_sequence_analysis/_process_cwl.R
# ```
library(tidyverse)
# fetch and read cwl file
lines <- read_lines("https://bitbucket.org/CRSwDev/cwl/raw/8feeace1141b24749ea6003f8e6ad6d3ad5232de/v2.2.1/rhapsody_pipeline_2.2.1.cwl")
cwl_header <- lines[[1]]
cwl_obj <- jsonlite::fromJSON(lines[-1], simplifyVector = FALSE)
# detect main workflow arguments
gr <- dynutils::list_as_tibble(cwl_obj$`$graph`)
gr %>% print(n = 100)
main <- gr %>% filter(gr$id == "#main")
main_inputs <- main$inputs[[1]]
input_ids <- main_inputs %>% map_chr("id") %>% gsub("^#main/", "", .)
# check whether in config
config <- yaml::read_yaml("src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml")
config$all_arguments <- config$argument_groups %>% map("arguments") %>% list_flatten()
arg_names <- config$all_arguments %>% map_chr("name") %>% gsub("^--", "", .)
# arguments in cwl but not in config
setdiff(tolower(input_ids), arg_names)
# arguments in config but not in cwl
setdiff(arg_names, tolower(input_ids))
# create arguments from main_inputs
arguments <- map(main_inputs, function(main_input) {
input_id <- main_input$id %>% gsub("^#main/", "", .)
input_type <- main_input$type[[2]]
if (is.list(input_type) && input_type$type == "array") {
multiple <- TRUE
input_type <- input_type$items
} else {
multiple <- FALSE
}
if (is.list(input_type) && input_type$type == "enum") {
choices <- input_type$symbols %>%
gsub(paste0(input_type$name, "/"), "", .)
input_type <- "enum"
} else {
choices <- NULL
}
description <-
if (is.null(main_input$label)) {
main_input$doc
} else if (is.null(main_input$doc)) {
main_input$label
} else {
paste0(main_input$label, ". ", main_input$doc)
}
type_map <- c(
"float" = "double",
"int" = "integer",
"string" = "string",
"boolean" = "boolean",
"File" = "file",
"enum" = "string"
)
out <- list(
name = paste0("--", tolower(input_id)),
type = type_map[input_type],
# TODO: use summary when viash 0.9 is released
# summary = main_input$doc,
# description = main_input$doc,
description = description,
multiple = multiple,
choices = choices,
info = list(
config_key = input_id
)
)
out[!sapply(out, is.null)]
})
yaml::write_yaml(
arguments,
"src/bd_rhapsody/bd_rhapsody_sequence_analysis/arguments.yaml",
handlers = list(
logical = yaml::verbatim_logical
)
)

View File

@@ -0,0 +1,661 @@
name: bd_rhapsody_sequence_analysis
namespace: bd_rhapsody
description: |
BD Rhapsody Sequence Analysis CWL pipeline v2.2.
This pipeline performs analysis of single-cell multiomic sequence read (FASTQ) data. The supported
sequencing libraries are those generated by the BD Rhapsody™ assay kits, including: Whole Transcriptome
mRNA (WTA), Targeted mRNA, AbSeq Antibody-Oligonucleotides (ABC), Single-Cell Multiplexing (SMK),
TCR/BCR (VDJ), and ATAC-Seq.
keywords: [rna-seq, single-cell, multiomic, atac-seq, targeted, abseq, tcr, bcr]
links:
repository: https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1
documentation: https://bd-rhapsody-bioinfo-docs.genomics.bd.com
license: Unknown
authors:
- __merge__: /src/_authors/robrecht_cannoodt.yaml
roles: [ author, maintainer ]
- __merge__: /src/_authors/weiwei_schultz.yaml
roles: [ contributor ]
argument_groups:
- name: Inputs
arguments:
- name: "--reads"
type: file
description: |
Reads (optional) - Path to your FASTQ.GZ formatted read files from libraries that may include:
- WTA mRNA
- Targeted mRNA
- AbSeq
- Sample Multiplexing
- VDJ
You may specify as many R1/R2 read pairs as you want.
required: false
multiple: true
example:
- WTALibrary_S1_L001_R1_001.fastq.gz
- WTALibrary_S1_L001_R2_001.fastq.gz
info:
config_key: Reads
- name: "--reads_atac"
type: file
description: |
Path to your FASTQ.GZ formatted read files from ATAC-Seq libraries.
You may specify as many R1/R2/I2 files as you want.
required: false
multiple: true
example:
- ATACLibrary_S2_L001_R1_001.fastq.gz
- ATACLibrary_S2_L001_R2_001.fastq.gz
- ATACLibrary_S2_L001_I2_001.fastq.gz
info:
config_key: Reads_ATAC
- name: References
description: |
Assay type will be inferred from the provided reference(s).
Do not provide both reference_archive and targeted_reference at the same time.
Valid reference input combinations:
- reference_archive: WTA only
- reference_archive & abseq_reference: WTA + AbSeq
- reference_archive & supplemental_reference: WTA + extra transgenes
- reference_archive & abseq_reference & supplemental_reference: WTA + AbSeq + extra transgenes
- reference_archive: WTA + ATAC or ATAC only
- reference_archive & supplemental_reference: WTA + ATAC + extra transgenes
- targeted_reference: Targeted only
- targeted_reference & abseq_reference: Targeted + AbSeq
- abseq_reference: AbSeq only
The reference_archive can be generated with the bd_rhapsody_make_reference component.
Alternatively, BD also provides standard references which can be downloaded from these locations:
- Human: https://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-WTA/Pipeline-version2.x_WTA_references/RhapRef_Human_WTA_2023-02.tar.gz
- Mouse: https://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-WTA/Pipeline-version2.x_WTA_references/RhapRef_Mouse_WTA_2023-02.tar.gz
arguments:
- name: "--reference_archive"
type: file
description: |
Path to Rhapsody WTA Reference in the tar.gz format.
Structure of the reference archive:
- `BD_Rhapsody_Reference_Files/`: top level folder
- `star_index/`: sub-folder containing STAR index, that is files created with `STAR --runMode genomeGenerate`
- GTF for gene-transcript-annotation e.g. "gencode.v43.primary_assembly.annotation.gtf"
example: "RhapRef_Human_WTA_2023-02.tar.gz"
required: false
info:
config_key: Reference_Archive
- name: "--targeted_reference"
type: file
description: |
Path to the targeted reference file in FASTA format.
example: "BD_Rhapsody_Immune_Response_Panel_Hs.fasta"
multiple: true
info:
config_key: Targeted_Reference
- name: "--abseq_reference"
type: file
description: Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used.
example: "AbSeq_reference.fasta"
multiple: true
info:
config_key: AbSeq_Reference
- name: "--supplemental_reference"
type: file
alternatives: [-s]
description: Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences to be aligned against in a WTA assay experiment.
example: "supplemental_reference.fasta"
multiple: true
info:
config_key: Supplemental_Reference
- name: Outputs
description: Outputs for all pipeline runs
# based on https://bd-rhapsody-bioinfo-docs.genomics.bd.com/outputs/top_outputs.html
arguments:
- name: "--output_dir"
type: file
direction: output
alternatives: [-o]
description: "The unprocessed output directory containing all the outputs from the pipeline."
required: true
example: output_dir/
- name: "--output_seurat"
type: file
direction: output
description: "Single-cell analysis tool inputs. Seurat (.rds) input file containing RSEC molecules data table and all cell annotation metadata."
example: output_seurat.rds
required: false
info:
template: "[sample_name]_Seurat.rds"
- name: "--output_mudata"
type: file
direction: output
description: "Single-cell analysis tool inputs. Scanpy / Muon input file containing RSEC molecules data table and all cell annotation metadata."
example: output_mudata.h5mu
required: false
info:
template: "[sample_name].h5mu"
- name: "--metrics_summary"
type: file
direction: output
description: "Metrics Summary. Report containing sequencing, molecules, and cell metrics."
example: metrics_summary.csv
required: false
info:
template: "[sample_name]_Metrics_Summary.csv"
- name: "--pipeline_report"
type: file
direction: output
description: "Pipeline Report. Summary report containing the results from the sequencing analysis pipeline run."
example: pipeline_report.html
required: false
info:
template: "[sample_name]_Pipeline_Report.html"
- name: "--rsec_mols_per_cell"
type: file
direction: output
description: "Molecules per bioproduct per cell bassed on RSEC"
example: RSEC_MolsPerCell_MEX.zip
required: false
info:
template: "[sample_name]_RSEC_MolsPerCell_MEX.zip"
- name: "--dbec_mols_per_cell"
type: file
direction: output
description: "Molecules per bioproduct per cell bassed on DBEC. DBEC data table is only output if the experiment includes targeted mRNA or AbSeq bioproducts."
example: DBEC_MolsPerCell_MEX.zip
required: false
info:
template: "[sample_name]_DBEC_MolsPerCell_MEX.zip"
- name: "--rsec_mols_per_cell_unfiltered"
type: file
direction: output
description: "Unfiltered tables containing all cell labels with ≥10 reads."
example: RSEC_MolsPerCell_Unfiltered_MEX.zip
required: false
info:
template: "[sample_name]_RSEC_MolsPerCell_Unfiltered_MEX.zip"
- name: "--bam"
type: file
direction: output
description: "Alignment file of R2 with associated R1 annotations for Bioproduct."
example: BioProduct.bam
required: false
info:
template: "[sample_name]_Bioproduct.bam"
- name: "--bam_index"
type: file
direction: output
description: "Index file for the alignment file."
example: BioProduct.bam.bai
required: false
info:
template: "[sample_name]_Bioproduct.bam.bai"
- name: "--bioproduct_stats"
type: file
direction: output
description: "Bioproduct Stats. Metrics from RSEC and DBEC Unique Molecular Identifier adjustment algorithms on a per-bioproduct basis."
example: Bioproduct_Stats.csv
required: false
info:
template: "[sample_name]_Bioproduct_Stats.csv"
- name: "--dimred_tsne"
type: file
direction: output
description: "t-SNE dimensionality reduction coordinates per cell index"
example: tSNE_coordinates.csv
required: false
info:
template: "[sample_name]_(assay)_tSNE_coordinates.csv"
- name: "--dimred_umap"
type: file
direction: output
description: "UMAP dimensionality reduction coordinates per cell index"
example: UMAP_coordinates.csv
required: false
info:
template: "[sample_name]_(assay)_UMAP_coordinates.csv"
- name: "--immune_cell_classification"
type: file
direction: output
description: "Immune Cell Classification. Cell type classification based on the expression of immune cell markers."
example: Immune_Cell_Classification.csv
required: false
info:
template: "[sample_name]_(assay)_cell_type_experimental.csv"
- name: Multiplex outputs
description: Outputs when multiplex option is selected
arguments:
- name: "--sample_tag_metrics"
type: file
direction: output
description: "Sample Tag Metrics. Metrics from the sample determination algorithm."
example: Sample_Tag_Metrics.csv
required: false
info:
template: "[sample_name]_Sample_Tag_Metrics.csv"
- name: "--sample_tag_calls"
type: file
direction: output
description: "Sample Tag Calls. Assigned Sample Tag for each putative cell"
example: Sample_Tag_Calls.csv
required: false
info:
template: "[sample_name]_Sample_Tag_Calls.csv"
- name: "--sample_tag_counts"
type: file
direction: output
description: "Sample Tag Counts. Separate data tables and metric summary for cells assigned to each sample tag. Note: For putative cells that could not be assigned a specific Sample Tag, a Multiplet_and_Undetermined.zip file is also output."
example: Sample_Tag1.zip
required: false
multiple: true
info:
template: "[sample_name]_Sample_Tag[number].zip"
- name: "--sample_tag_counts_unassigned"
type: file
direction: output
description: "Sample Tag Counts Unassigned. Data table and metric summary for cells that could not be assigned a specific Sample Tag."
example: Multiplet_and_Undetermined.zip
required: false
info:
template: "[sample_name]_Multiplet_and_Undetermined.zip"
- name: VDJ Outputs
description: Outputs when VDJ option selected
arguments:
- name: "--vdj_metrics"
type: file
direction: output
description: "VDJ Metrics. Overall metrics from the VDJ analysis."
example: VDJ_Metrics.csv
required: false
info:
template: "[sample_name]_VDJ_Metrics.csv"
- name: "--vdj_per_cell"
type: file
direction: output
description: "VDJ Per Cell. Cell specific read and molecule counts, VDJ gene segments, CDR3 sequences, paired chains, and cell type."
example: VDJ_perCell.csv
required: false
info:
template: "[sample_name]_VDJ_perCell.csv"
- name: "--vdj_per_cell_uncorrected"
type: file
direction: output
description: "VDJ Per Cell Uncorrected. Cell specific read and molecule counts, VDJ gene segments, CDR3 sequences, paired chains, and cell type."
example: VDJ_perCell_uncorrected.csv
required: false
info:
template: "[sample_name]_VDJ_perCell_uncorrected.csv"
- name: "--vdj_dominant_contigs"
type: file
direction: output
description: "VDJ Dominant Contigs. Dominant contig for each cell label chain type combination (putative cells only)."
example: VDJ_Dominant_Contigs_AIRR.csv
required: false
info:
template: "[sample_name]_VDJ_Dominant_Contigs_AIRR.csv"
- name: "--vdj_unfiltered_contigs"
type: file
direction: output
description: "VDJ Unfiltered Contigs. All contigs that were assembled and annotated successfully (all cells)."
example: VDJ_Unfiltered_Contigs_AIRR.csv
required: false
info:
template: "[sample_name]_VDJ_Unfiltered_Contigs_AIRR.csv"
- name: "ATAC-Seq outputs"
description: Outputs when ATAC-Seq option selected
arguments:
- name: "--atac_metrics"
type: file
direction: output
description: "ATAC Metrics. Overall metrics from the ATAC-Seq analysis."
example: ATAC_Metrics.csv
required: false
info:
template: "[sample_name]_ATAC_Metrics.csv"
- name: "--atac_metrics_json"
type: file
direction: output
description: "ATAC Metrics JSON. Overall metrics from the ATAC-Seq analysis in JSON format."
example: ATAC_Metrics.json
required: false
info:
template: "[sample_name]_ATAC_Metrics.json"
- name: "--atac_fragments"
type: file
direction: output
description: "ATAC Fragments. Chromosomal location, cell index, and read support for each fragment detected"
example: ATAC_Fragments.bed.gz
required: false
info:
template: "[sample_name]_ATAC_Fragments.bed.gz"
- name: "--atac_fragments_index"
type: file
direction: output
description: "Index of ATAC Fragments."
example: ATAC_Fragments.bed.gz.tbi
required: false
info:
template: "[sample_name]_ATAC_Fragments.bed.gz.tbi"
- name: "--atac_transposase_sites"
type: file
direction: output
description: "ATAC Transposase Sites. Chromosomal location, cell index, and read support for each transposase site detected"
example: ATAC_Transposase_Sites.bed.gz
required: false
info:
template: "[sample_name]_ATAC_Transposase_Sites.bed.gz"
- name: "--atac_transposase_sites_index"
type: file
direction: output
description: "Index of ATAC Transposase Sites."
example: ATAC_Transposase_Sites.bed.gz.tbi
required: false
info:
template: "[sample_name]_ATAC_Transposase_Sites.bed.gz.tbi"
- name: "--atac_peaks"
type: file
direction: output
description: "ATAC Peaks. Peak regions of transposase activity"
example: ATAC_Peaks.bed.gz
required: false
info:
template: "[sample_name]_ATAC_Peaks.bed.gz"
- name: "--atac_peaks_index"
type: file
direction: output
description: "Index of ATAC Peaks."
example: ATAC_Peaks.bed.gz.tbi
required: false
info:
template: "[sample_name]_ATAC_Peaks.bed.gz.tbi"
- name: "--atac_peak_annotation"
type: file
direction: output
description: "ATAC Peak Annotation. Estimated annotation of peak-to-gene connections"
example: peak_annotation.tsv.gz
required: false
info:
template: "[sample_name]_peak_annotation.tsv.gz"
- name: "--atac_cell_by_peak"
type: file
direction: output
description: "ATAC Cell by Peak. Peak regions of transposase activity per cell"
example: ATAC_Cell_by_Peak_MEX.zip
required: false
info:
template: "[sample_name]_ATAC_Cell_by_Peak_MEX.zip"
- name: "--atac_cell_by_peak_unfiltered"
type: file
direction: output
description: "ATAC Cell by Peak Unfiltered. Unfiltered file containing all cell labels with >=1 transposase sites in peaks."
example: ATAC_Cell_by_Peak_Unfiltered_MEX.zip
required: false
info:
template: "[sample_name]_ATAC_Cell_by_Peak_Unfiltered_MEX.zip"
- name: "--atac_bam"
type: file
direction: output
description: "ATAC BAM. Alignment file for R1 and R2 with associated I2 annotations for ATAC-Seq. Only output if the BAM generation flag is set to true."
example: ATAC.bam
required: false
info:
template: "[sample_name]_ATAC.bam"
- name: "--atac_bam_index"
type: file
direction: output
description: "Index of ATAC BAM."
example: ATAC.bam.bai
required: false
info:
template: "[sample_name]_ATAC.bam.bai"
- name: AbSeq Cell Calling outputs
description: Outputs when Cell Calling Abseq is selected
arguments:
- name: "--protein_aggregates_experimental"
type: file
direction: output
description: "Protein Aggregates Experimental"
example: Protein_Aggregates_Experimental.csv
required: false
info:
template: "[sample_name]_Protein_Aggregates_Experimental.csv"
- name: Putative Cell Calling Settings
arguments:
- name: "--cell_calling_data"
type: string
description: |
Specify the dataset to be used for putative cell calling: mRNA, AbSeq, ATAC, mRNA_and_ATAC
For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above.
For putative cell calling using an ATAC dataset, please provide a WTA+ATAC-Seq Reference_Archive file above.
The default data for putative cell calling, will be determined the following way:
- If mRNA Reads and ATAC Reads exist: mRNA_and_ATAC
- If only ATAC Reads exist: ATAC
- Otherwise: mRNA
choices: [mRNA, AbSeq, ATAC, mRNA_and_ATAC]
example: mRNA
info:
config_key: Cell_Calling_Data
- name: "--cell_calling_bioproduct_algorithm"
type: string
description: |
Specify the bioproduct algorithm to be used for putative cell calling: Basic or Refined
By default, the Basic algorithm will be used for putative cell calling.
choices: [Basic, Refined]
example: Basic
info:
config_key: Cell_Calling_Bioproduct_Algorithm
- name: "--cell_calling_atac_algorithm"
type: string
description: |
Specify the ATAC-seq algorithm to be used for putative cell calling: Basic or Refined
By default, the Basic algorithm will be used for putative cell calling.
choices: [Basic, Refined]
example: Basic
info:
config_key: Cell_Calling_ATAC_Algorithm
- name: "--exact_cell_count"
type: integer
description: |
Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count
example: 10000
min: 1
info:
config_key: Exact_Cell_Count
- name: "--expected_cell_count"
type: integer
description: |
Guide the basic putative cell calling algorithm by providing an estimate of the number of cells expected. Usually this can be the number of cells loaded into the Rhapsody cartridge. If there are multiple inflection points on the second derivative cumulative curve, this will ensure the one selected is near the expected.
example: 20000
min: 1
info:
config_key: Expected_Cell_Count
- name: Intronic Reads Settings
arguments:
- name: --exclude_intronic_reads
type: boolean
description: |
By default, the flag is false, and reads aligned to exons and introns are considered and represented in molecule counts. When the flag is set to true, intronic reads will be excluded.
The value can be true or false.
example: false
info:
config_key: Exclude_Intronic_Reads
- name: Multiplex Settings
arguments:
- name: "--sample_tags_version"
type: string
description: |
Specify the version of the Sample Tags used in the run:
* If Sample Tag Multiplexing was done, specify the appropriate version: human, mouse, flex, nuclei_includes_mrna, nuclei_atac_only
* If this is an SMK + Nuclei mRNA run or an SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq) run (and not an SMK + ATAC-Seq only run), choose the "nuclei_includes_mrna" option.
* If this is an SMK + ATAC-Seq only run (and not SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq)), choose the "nuclei_atac_only" option.
choices: [human, mouse, flex, nuclei_includes_mrna, nuclei_atac_only]
example: human
info:
config_key: Sample_Tags_Version
- name: "--tag_names"
type: string
description: |
Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv
Do not use the special characters: &, (), [], {}, <>, ?, |
multiple: true
example: [4-mySample, 9-myOtherSample, 6-alsoThisSample]
info:
config_key: Tag_Names
- name: VDJ arguments
arguments:
- name: "--vdj_version"
type: string
description: |
If VDJ was done, specify the appropriate option: human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR
choices: [human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR]
example: human
info:
config_key: VDJ_Version
- name: ATAC options
arguments:
- name: "--predefined_atac_peaks"
type: file
description: An optional BED file containing pre-established chromatin accessibility peak regions for generating the ATAC cell-by-peak matrix.
example: predefined_peaks.bed
info:
config_key: Predefined_ATAC_Peaks
- name: Additional options
arguments:
- name: "--run_name"
type: string
description: |
Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces.
default: sample
info:
config_key: Run_Name
- name: "--generate_bam"
type: boolean
description: |
Specify whether to create the BAM file output
default: false
info:
config_key: Generate_Bam
- name: "--long_reads"
type: boolean
description: |
Use STARlong (default: undefined - i.e. autodetects based on read lengths) - Specify if the STARlong aligner should be used instead of STAR. Set to true if the reads are longer than 650bp.
info:
config_key: Long_Reads
- name: Advanced options
description: |
NOTE: Only change these if you are really sure about what you are doing
arguments:
- name: "--custom_star_params"
type: string
description: |
Modify STAR alignment parameters - Set this parameter to fully override default STAR mapping parameters used in the pipeline.
For reference this is the default that is used:
Short Reads: `--outFilterScoreMinOverLread 0 --outFilterMatchNminOverLread 0 --outFilterMultimapScoreRange 0 --clip3pAdapterSeq AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA --seedSearchStartLmax 50 --outFilterMatchNmin 25 --limitOutSJcollapsed 2000000`
Long Reads: Same as Short Reads + `--seedPerReadNmax 10000`
This applies to fastqs provided in the Reads user input
Do NOT set any non-mapping related params like `--genomeDir`, `--outSAMtype`, `--outSAMunmapped`, `--readFilesIn`, `--runThreadN`, etc.
We use STAR version 2.7.10b
example: "--alignIntronMax 6000 --outFilterScoreMinOverLread 0.1 --limitOutSJcollapsed 2000000"
info:
config_key: Custom_STAR_Params
- name: "--custom_bwa_mem2_params"
type: string
description: |
Modify bwa-mem2 alignment parameters - Set this parameter to fully override bwa-mem2 mapping parameters used in the pipeline
The pipeline does not specify any custom mapping params to bwa-mem2 so program default values are used
This applies to fastqs provided in the Reads_ATAC user input
Do NOT set any non-mapping related params like `-C`, `-t`, etc.
We use bwa-mem2 version 2.2.1
example: "-k 16 -w 200 -r"
info:
config_key: Custom_bwa_mem2_Params
- name: CWL-runner arguments
arguments:
- name: "--parallel"
type: boolean
description: "Run jobs in parallel."
default: true
- name: "--timestamps"
type: boolean_true
description: "Add timestamps to the errors, warnings, and notifications."
- name: Undocumented arguments
arguments:
- name: --abseq_umi
type: integer
multiple: false
info:
config_key: AbSeq_UMI
- name: --target_analysis
type: boolean
multiple: false
info:
config_key: Target_analysis
- name: --vdj_jgene_evalue
type: double
description: |
e-value threshold for J gene. The e-value threshold for J gene call by IgBlast/PyIR, default is set as 0.001
multiple: false
info:
config_key: VDJ_JGene_Evalue
- name: --vdj_vgene_evalue
type: double
description: |
e-value threshold for V gene. The e-value threshold for V gene call by IgBlast/PyIR, default is set as 0.001
multiple: false
info:
config_key: VDJ_VGene_Evalue
- name: --write_filtered_reads
type: boolean
multiple: false
info:
config_key: Write_Filtered_Reads
resources:
- type: python_script
path: script.py
test_resources:
- type: python_script
path: test.py
- path: ../test_data
- path: ../helpers
requirements:
commands: [ "cwl-runner" ]
engines:
- type: docker
image: bdgenomics/rhapsody:2.2.1
setup:
- type: apt
packages: [procps, git]
- type: python
packages: [cwlref-runner, cwl-runner]
- type: docker
run: |
mkdir /var/bd_rhapsody_cwl && \
cd /var/bd_rhapsody_cwl && \
git clone https://bitbucket.org/CRSwDev/cwl.git . && \
git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de
- type: docker
run:
- VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)
- 'echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt'
test_setup:
- type: python
packages: [biopython, gffutils]
runners:
- type: executable
- type: nextflow

View File

@@ -0,0 +1,167 @@
```bash
cwl-runner src/bd_rhapsody/bd_rhapsody_sequence_analysis/rhapsody_pipeline_2.2.1_nodocker.cwl --help
```
usage: src/bd_rhapsody/bd_rhapsody_sequence_analysis/rhapsody_pipeline_2.2.1_nodocker.cwl
[-h] [--AbSeq_Reference ABSEQ_REFERENCE] [--AbSeq_UMI ABSEQ_UMI]
[--Cell_Calling_ATAC_Algorithm CELL_CALLING_ATAC_ALGORITHM]
[--Cell_Calling_Bioproduct_Algorithm CELL_CALLING_BIOPRODUCT_ALGORITHM]
[--Cell_Calling_Data CELL_CALLING_DATA]
[--Custom_STAR_Params CUSTOM_STAR_PARAMS]
[--Custom_bwa_mem2_Params CUSTOM_BWA_MEM2_PARAMS]
[--Exact_Cell_Count EXACT_CELL_COUNT] [--Exclude_Intronic_Reads]
[--Expected_Cell_Count EXPECTED_CELL_COUNT] [--Generate_Bam]
[--Long_Reads] [--Maximum_Threads MAXIMUM_THREADS]
[--Predefined_ATAC_Peaks PREDEFINED_ATAC_PEAKS] [--Reads READS]
[--Reads_ATAC READS_ATAC] [--Reference_Archive REFERENCE_ARCHIVE]
[--Run_Name RUN_NAME] [--Sample_Tags_Version SAMPLE_TAGS_VERSION]
[--Supplemental_Reference SUPPLEMENTAL_REFERENCE]
[--Tag_Names TAG_NAMES] [--Target_analysis]
[--Targeted_Reference TARGETED_REFERENCE]
[--VDJ_JGene_Evalue VDJ_JGENE_EVALUE]
[--VDJ_VGene_Evalue VDJ_VGENE_EVALUE] [--VDJ_Version VDJ_VERSION]
[--Write_Filtered_Reads]
[job_order]
The BD Rhapsody™ assays are used to create sequencing libraries from single
cell transcriptomes. After sequencing, the analysis pipeline takes the FASTQ
files and a reference file for gene alignment. The pipeline generates
molecular counts per cell, read counts per cell, metrics, and an alignment
file.
positional arguments:
job_order Job input json file
options:
-h, --help show this help message and exit
--AbSeq_Reference ABSEQ_REFERENCE
AbSeq Reference
--AbSeq_UMI ABSEQ_UMI
--Cell_Calling_ATAC_Algorithm CELL_CALLING_ATAC_ALGORITHM
Specify the ATAC algorithm to be used for ATAC
putative cell calling. The Basic algorithm is the
default.
--Cell_Calling_Bioproduct_Algorithm CELL_CALLING_BIOPRODUCT_ALGORITHM
Specify the bioproduct algorithm to be used for
mRNA/AbSeq putative cell calling. The Basic algorithm
is the default.
--Cell_Calling_Data CELL_CALLING_DATA
Specify the data to be used for putative cell calling.
The default data for putative cell calling will be
determined the following way: - If mRNA and ATAC Reads
exist, mRNA_and_ATAC is the default. - If only ATAC
Reads exist, ATAC is the default. - Otherwise, mRNA is
the default.
--Custom_STAR_Params CUSTOM_STAR_PARAMS
Allows you to specify custom STAR aligner mapping
parameters. Only the mapping parameters you provide
here will be used with STAR, meaning that you must
provide the complete list of parameters that you want
to take effect. For reference, the parameters used by
default in the pipeline are: 1. Short Reads:
--outFilterScoreMinOverLread 0
--outFilterMatchNminOverLread 0
--outFilterMultimapScoreRange 0 --clip3pAdapterSeq
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
--seedSearchStartLmax 50 --outFilterMatchNmin 25
--limitOutSJcollapsed 2000000 2. Long Reads: Same
options as short reads + --seedPerReadNmax 10000
Example input: --alignIntronMax 500000
--outFilterScoreMinOverLread 0 --limitOutSJcollapsed
2000000 Important: 1. This applies to fastqs provided
in the Reads user input 2. Please do not specify any
non-mapping related params like: --runThreadN,
--genomeDir --outSAMtype, etc. 3. Please only use
params supported by STAR version 2.7.10b
--Custom_bwa_mem2_Params CUSTOM_BWA_MEM2_PARAMS
Allows you to specify custom bwa-mem2 mapping
parameters. Only the mapping parameters you provide
here will be used with bwa-mem2, meaning that you must
provide the complete list of parameters that you want
to take effect. The pipeline uses program default
mapping parameters. Example input: -k 15 -w 200 -r 2
Important: 1. This applies to fastqs provided in the
Reads_ATAC user input 2. Please do not specify any
non-mapping related params like: -C, -t, etc. 3.
Please only use params supported by bwa-mem2 version
2.2.1
--Exact_Cell_Count EXACT_CELL_COUNT
Set a specific number (>=1) of cells as putative,
based on those with the highest error-corrected read
count
--Exclude_Intronic_Reads
By default, reads aligned to exons and introns are
considered and represented in molecule counts.
Including intronic reads may increase sensitivity,
resulting in an increase in molecule counts and the
number of genes per cell for both cellular and nuclei
samples. Intronic reads may indicate unspliced mRNAs
and are also useful, for example, in the study of
nuclei and RNA velocity. When set to true, intronic
reads will be excluded.
--Expected_Cell_Count EXPECTED_CELL_COUNT
Optional. Guide the basic putative cell calling
algorithm by providing an estimate of the number of
cells expected. Usually this can be the number of
cells loaded into the Rhapsody cartridge. If there are
multiple inflection points on the second derivative
cumulative curve, this will ensure the one selected is
near the expected.
--Generate_Bam Default: false. A Bam read alignment file contains
reads from all the input libraries, but creating it
can consume a lot of compute and disk resources. By
setting this field to true, the Bam file will be
created. This option is shared for both Bioproduct and
ATAC libraries.
--Long_Reads By default, we detect if there are any reads longer
than 650bp and then flag QualCLAlign to use STARlong
instead of STAR. This flag can be explicitly set if it
is known in advance that there are reads longer than
650bp.
--Maximum_Threads MAXIMUM_THREADS
The maximum number of threads to use in the pipeline.
By default, all available cores are used.
--Predefined_ATAC_Peaks PREDEFINED_ATAC_PEAKS
An optional BED file containing pre-established
chromatin accessibility peak regions for generating
the ATAC cell-by-peak matrix. Only applies to ATAC
assays.
--Reads READS FASTQ files from libraries that may include WTA mRNA,
Targeted mRNA, AbSeq, Sample Multiplexing, and related
technologies
--Reads_ATAC READS_ATAC
FASTQ files from libraries generated using the ATAC
assay protocol. Each lane of a library is expected to
have 3 FASTQs - R1, R2 and I1/I2, where the index read
contains the Cell Barcode and UMI sequence. Only
applies to ATAC assays.
--Reference_Archive REFERENCE_ARCHIVE
Reference Files Archive
--Run_Name RUN_NAME This is a name for output files, for example
Experiment1_Metrics_Summary.csv. Default if left empty
is to name run based on a library. Any non-alpha
numeric characters will be changed to a hyphen.
--Sample_Tags_Version SAMPLE_TAGS_VERSION
The sample multiplexing kit version. This option
should only be set for a multiplexed experiment.
--Supplemental_Reference SUPPLEMENTAL_REFERENCE
Supplemental Reference
--Tag_Names TAG_NAMES
Specify the Sample Tag number followed by - (hyphen)
and a sample name to appear in the output files. For
example: 4-Ramos. Should be alpha numeric, with + -
and _ allowed. Any special characters: &, (), [], {},
<>, ?, | will be corrected to underscores.
--Target_analysis
--Targeted_Reference TARGETED_REFERENCE
Targeted Reference
--VDJ_JGene_Evalue VDJ_JGENE_EVALUE
The e-value threshold for J gene call by IgBlast/PyIR,
default is set as 0.001
--VDJ_VGene_Evalue VDJ_VGENE_EVALUE
The e-value threshold for V gene call by IgBlast/PyIR,
default is set as 0.001
--VDJ_Version VDJ_VERSION
The VDJ species and chain types. This option should
only be set for VDJ experiment.
--Write_Filtered_Reads

View File

@@ -0,0 +1,203 @@
#!/usr/bin/env cwl-runner
cwl:tool: rhapsody
# This is a template YML file used to specify the inputs for a BD Rhapsody Sequence Analysis pipeline run.
# See the BD Rhapsody Sequence Analysis Pipeline User Guide for more details. Enter the following information:
## Reads (optional) - Path to your FASTQ.GZ formatted read files from libraries that may include:
# - WTA mRNA
# - Targeted mRNA
# - AbSeq
# - Sample Multiplexing
# - VDJ
# You may specify as many R1/R2 read pairs as you want.
Reads:
- class: File
location: "test/WTALibrary_S1_L001_R1_001.fastq.gz"
- class: File
location: "test/WTALibrary_S1_L001_R2_001.fastq.gz"
## Reads_ATAC (optional) - Path to your FASTQ.GZ formatted read files from ATAC-Seq libraries.
## You may specify as many R1/R2/I2 files as you want.
Reads_ATAC:
- class: File
location: "test/ATACLibrary_S2_L001_R1_001.fastq.gz"
- class: File
location: "test/ATACLibrary_S2_L001_R2_001.fastq.gz"
- class: File
location: "test/ATACLibrary_S2_L001_I2_001.fastq.gz"
## Assay type will be inferred from the provided reference(s)
## Do not provide both Reference_Archive and Targeted_Reference at the same time
##
## Valid reference input combinations:
## WTA Reference_Archive (WTA only)
## WTA Reference_Archive + AbSeq_Reference (WTA + AbSeq)
## WTA Reference_Archive + Supplemental_Reference (WTA + extra transgenes)
## WTA Reference_Archive + AbSeq_Reference + Supplemental_Reference (WTA + AbSeq + extra transgenes)
## WTA+ATAC-Seq Reference_Archive (WTA + ATAC, ATAC only)
## WTA+ATAC-Seq Reference_Archive + Supplemental_Reference (WTA + ATAC + extra transgenes)
## Targeted_Reference (Targeted only)
## Targeted_Reference + AbSeq_Reference (Targeted + AbSeq)
## AbSeq_Reference (AbSeq only)
## See the BD Rhapsody Sequence Analysis Pipeline User Guide for instructions on how to:
## - Obtain a pre-built Rhapsody Reference file
## - Create a custom Rhapsody Reference file
## WTA Reference_Archive (required for WTA mRNA assay) - Path to Rhapsody WTA Reference in the tar.gz format.
##
## --Structure of reference archive--
## BD_Rhapsody_Reference_Files/ # top level folder
## star_index/ # sub-folder containing STAR index
## [files created with STAR --runMode genomeGenerate]
## [GTF for gene-transcript-annotation e.g. "gencode.v43.primary_assembly.annotation.gtf"]
##
## WTA+ATAC-Seq Reference_Archive (required for ATAC-Seq or Multiomic ATAC-Seq (WTA+ATAC-Seq) assays) - Path to Rhapsody WTA+ATAC-Seq Reference in the tar.gz format.
##
## --Structure of reference archive--
## BD_Rhapsody_Reference_Files/ # top level folder
## star_index/ # sub-folder containing STAR index
## [files created with STAR --runMode genomeGenerate]
## [GTF for gene-transcript-annotation e.g. "gencode.v43.primary_assembly.annotation.gtf"]
##
## mitochondrial_contigs.txt # mitochondrial contigs in the reference genome - one contig name per line. e.g. chrMT or chrM, etc.
##
## bwa-mem2_index/ # sub-folder containing bwa-mem2 index
## [files created with bwa-mem2 index]
##
Reference_Archive:
class: File
location: "test/RhapRef_Human_WTA_2023-02.tar.gz"
# location: "test/RhapRef_Human_WTA-ATAC_2023-08.tar.gz"
## Targeted_Reference (required for Targeted mRNA assay) - Path to the targeted reference file in FASTA format.
#Targeted_Reference:
# - class: File
# location: "test/BD_Rhapsody_Immune_Response_Panel_Hs.fasta"
## AbSeq_Reference (optional) - Path to the AbSeq reference file in FASTA format. Only needed if BD AbSeq Ab-Oligos are used.
## For putative cell calling using an AbSeq dataset, please provide an AbSeq reference fasta file as the AbSeq_Reference.
#AbSeq_Reference:
# - class: File
# location: "test/AbSeq_reference.fasta"
## Supplemental_Reference (optional) - Path to the supplemental reference file in FASTA format. Only needed if there are additional transgene sequences to be aligned against in a WTA assay experiment
#Supplemental_Reference:
# - class: File
# location: "test/supplemental_reference.fasta"
####################################
## Putative Cell Calling Settings ##
####################################
## Putative cell calling dataset (optional) - Specify the dataset to be used for putative cell calling: mRNA, AbSeq, ATAC, mRNA_and_ATAC
## For putative cell calling using an AbSeq dataset, please provide an AbSeq_Reference fasta file above.
## For putative cell calling using an ATAC dataset, please provide a WTA+ATAC-Seq Reference_Archive file above.
## The default data for putative cell calling, will be determined the following way:
## If mRNA Reads and ATAC Reads exist:
## Cell_Calling_Data: mRNA_and_ATAC
## If only ATAC Reads exist:
## Cell_Calling_Data: ATAC
## Otherwise:
## Cell_Calling_Data: mRNA
#Cell_Calling_Data: mRNA
## Putative cell calling bioproduct algorithm (optional) - Specify the bioproduct algorithm to be used for putative cell calling: Basic or Refined
## By default, the Basic algorithm will be used for putative cell calling.
#Cell_Calling_Bioproduct_Algorithm: Basic
## Putative cell calling ATAC algorithm (optional) - Specify the ATAC-seq algorithm to be used for putative cell calling: Basic or Refined
## By default, the Basic algorithm will be used for putative cell calling.
#Cell_Calling_ATAC_Algorithm: Basic
## Exact cell count (optional) - Set a specific number (>=1) of cells as putative, based on those with the highest error-corrected read count
#Exact_Cell_Count: 10000
## Expected Cell Count (optional) - Guide the basic putative cell calling algorithm by providing an estimate of the number of cells expected. Usually this can be the number of cells loaded into the Rhapsody cartridge. If there are multiple inflection points on the second derivative cumulative curve, this will ensure the one selected is near the expected.
#Expected_Cell_Count: 20000
####################################
## Intronic Reads Settings ##
####################################
## Exclude_Intronic_Reads (optional)
## By default, the flag is false, and reads aligned to exons and introns are considered and represented in molecule counts. When the flag is set to true, intronic reads will be excluded.
## The value can be true or false.
#Exclude_Intronic_Reads: true
#######################
## Multiplex options ##
#######################
## Sample Tags Version (optional) - If Sample Tag Multiplexing was done, specify the appropriate version: human, mouse, flex, nuclei_includes_mrna, nuclei_atac_only
## If this is an SMK + Nuclei mRNA run or an SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq) run (and not an SMK + ATAC-Seq only run), choose the "nuclei_includes_mrna" option.
## If this is an SMK + ATAC-Seq only run (and not SMK + Multiomic ATAC-Seq (WTA+ATAC-Seq)), choose the "nuclei_atac_only" option.
#Sample_Tags_Version: human
## Tag_Names (optional) - Specify the tag number followed by '-' and the desired sample name to appear in Sample_Tag_Metrics.csv
# Do not use the special characters: &, (), [], {}, <>, ?, |
#Tag_Names: [4-mySample, 9-myOtherSample, 6-alsoThisSample]
################
## VDJ option ##
################
## VDJ Version (optional) - If VDJ was done, specify the appropriate option: human, mouse, humanBCR, humanTCR, mouseBCR, mouseTCR
#VDJ_Version: human
##################
## ATAC options ##
##################
## Predefined ATAC Peaks - An optional BED file containing pre-established chromatin accessibility peak regions for generating the ATAC cell-by-peak matrix.
#Predefined_ATAC_Peaks:
# class: File
# location: "path/predefined_peaks.bed"
########################
## Additional Options ##
########################
## Run Name (optional)- Specify a run name to use as the output file base name. Use only letters, numbers, or hyphens. Do not use special characters or spaces.
#Run_Name: my-experiment
## Generate Bam (optional, default: false) - Specify whether to create the BAM file output
#Generate_Bam: true
## Maximum_Threads (integer, optional, default: [use all cores of CPU]) - Set the maximum number of threads to use in the read processing steps of the pipeline: QualCLAlign, AlignmentAnalysis, VDJ assembly
#Maximum_Threads: 16
## Use STARlong (optional, default: "auto" - i.e. autodetects based on read lengths) - Specify if the STARlong aligner should be used instead of STAR. Set to true if the reads are longer than 650bp.
## The value can be true or false.
#Long_Reads: true
########################
## Advanced Options ##
########################
## NOTE: Only change these if you are really sure about what you are doing
## Modify STAR alignment parameters - Set this parameter to fully override default STAR mapping parameters used in the pipeline.
## For reference this is the default that is used:
## Short Reads: --outFilterScoreMinOverLread 0 --outFilterMatchNminOverLread 0 --outFilterMultimapScoreRange 0 --clip3pAdapterSeq AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA --seedSearchStartLmax 50 --outFilterMatchNmin 25 --limitOutSJcollapsed 2000000
## Long Reads: Same as Short Reads + --seedPerReadNmax 10000
## This applies to fastqs provided in the Reads user input
## Do NOT set any non-mapping related params like --genomeDir, --outSAMtype, --outSAMunmapped, --readFilesIn, --runThreadN, etc.
## We use STAR version 2.7.10b
#Custom_STAR_Params: --alignIntronMax 6000 --outFilterScoreMinOverLread 0.1 --limitOutSJcollapsed 2000000
## Modify bwa-mem2 alignment parameters - Set this parameter to fully override bwa-mem2 mapping parameters used in the pipeline
## The pipeline does not specify any custom mapping params to bwa-mem2 so program default values are used
## This applies to fastqs provided in the Reads_ATAC user input
## Do NOT set any non-mapping related params like -C, -t, etc.
## We use bwa-mem2 version 2.2.1
#Custom_bwa_mem2_Params: -k 16 -w 200 -r

View File

@@ -0,0 +1,243 @@
import os
import re
import subprocess
import tempfile
from typing import Any
import yaml
import shutil
import glob
## VIASH START
par = {
'reads': [
'resources_test/bdrhap_5kjrt/raw/12ABC_S1_L432_R1_001_subset.fastq.gz',
'resources_test/bdrhap_5kjrt/raw/12ABC_S1_L432_R2_001_subset.fastq.gz'
],
'reads_atac': None,
'reference_archive': "resources_test/reference_gencodev41_chr1/reference_bd_rhapsody.tar.gz",
'targeted_reference': [],
'abseq_reference': [],
'supplemental_reference': [],
'output': 'output_dir',
'cell_calling_data': None,
'cell_calling_bioproduct_algorithm': None,
'cell_calling_atac_algorithm': None,
'exact_cell_count': None,
'expected_cell_count': None,
'exclude_intronic_reads': None,
'sample_tags_version': None,
'tag_names': [],
'vdj_version': None,
'predefined_atac_peaks': None,
'run_name': "sample",
'generate_bam': None,
'alignment_star_params': None,
'alignment_bwa_mem2_params': None,
'parallel': True,
'timestamps': False,
'dryrun': False
}
meta = {
'config': "target/nextflow/bd_rhaspody/bd_rhaspody_sequence_analysis/.config.vsh.yaml",
'resources_dir': os.path.abspath('src/bd_rhaspody/bd_rhaspody_sequence_analysis'),
'temp_dir': os.getenv("VIASH_TEMP"),
'memory_mb': None,
'cpus': None
}
## VIASH END
def clean_arg(argument):
argument["clean_name"] = argument["name"].lstrip("-")
return argument
def read_config(path: str) -> dict[str, Any]:
with open(path, 'r') as f:
config = yaml.safe_load(f)
config["arguments"] = [
clean_arg(arg)
for grp in config["argument_groups"]
for arg in grp["arguments"]
]
return config
def strip_margin(text: str) -> str:
return re.sub('(\n?)[ \t]*\|', '\\1', text)
def process_params(par: dict[str, Any], config, temp_dir: str) -> str:
# check input parameters
assert par["reads"] or par["reads_atac"], "Pass at least one set of inputs to --reads or --reads_atac."
# output to temp dir if output_dir was not passed
if not par["output_dir"]:
par["output_dir"] = os.path.join(temp_dir, "output")
# checking sample prefix
if par["run_name"] and re.match("[^A-Za-z0-9]", par["run_name"]):
print("--run_name should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.", flush=True)
par["run_name"] = re.sub("[^A-Za-z0-9\\-]", "-", par["run_name"])
# make paths absolute
for argument in config["arguments"]:
arg_clean_name = argument["clean_name"]
if not par[arg_clean_name] or not argument["type"] == "file":
continue
par_value = par[arg_clean_name]
if isinstance(par_value, list):
par_value_absolute = list(map(os.path.abspath, par_value))
else:
par_value_absolute = os.path.abspath(par_value)
par[arg_clean_name] = par_value_absolute
return par
def generate_config(par: dict[str, Any], config) -> str:
content_list = [strip_margin(f"""\
|#!/usr/bin/env cwl-runner
|
|cwl:tool: rhapsody
|""")]
for argument in config["arguments"]:
arg_clean_name = argument["clean_name"]
arg_par_value = par[arg_clean_name]
arg_info = argument.get("info") or {} # Note: .info might be None
config_key = arg_info.get("config_key")
if arg_par_value and config_key:
if argument["type"] == "file":
content = strip_margin(f"""\
|{config_key}:
|""")
if isinstance(arg_par_value, list):
for file in arg_par_value:
content += strip_margin(f"""\
| - class: File
| location: "{file}"
|""")
else:
content += strip_margin(f"""\
| class: File
| location: "{arg_par_value}"
|""")
content_list.append(content)
else:
content_list.append(strip_margin(f"""\
|{config_key}: {arg_par_value}
|"""))
## Write config to file
return ''.join(content_list)
def generate_config_file(par: dict[str, Any], config: dict[str, Any], temp_dir: str) -> str:
config_file = os.path.join(temp_dir, "config.yml")
config_content = generate_config(par, config)
with open(config_file, "w") as f:
f.write(config_content)
return config_file
def generate_cwl_file(meta: dict[str, Any], dir: str) -> str:
# create cwl file (if need be)
# orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_pipeline_2.2.1_nodocker.cwl")
orig_cwl_file="/var/bd_rhapsody_cwl/v2.2.1/rhapsody_pipeline_2.2.1.cwl"
if not meta["memory_mb"] and not meta["cpus"]:
return os.path.abspath(orig_cwl_file)
# Inject computational requirements into pipeline
cwl_file = os.path.join(dir, "pipeline.cwl")
# Read in the file
with open(orig_cwl_file, 'r') as file :
cwl_data = file.read()
# Inject computational requirements into pipeline
if meta["memory_mb"]:
memory = int(meta["memory_mb"]) - 2000 # keep 2gb for OS
cwl_data = re.sub('"ramMin": [^\n]*[^,](,?)\n', f'"ramMin": {memory}\\1\n', cwl_data)
if meta["cpus"]:
cwl_data = re.sub('"coresMin": [^\n]*[^,](,?)\n', f'"coresMin": {meta["cpus"]}\\1\n', cwl_data)
# Write the file out again
with open(cwl_file, 'w') as file:
file.write(cwl_data)
return os.path.abspath(cwl_file)
def copy_outputs(par: dict[str, Any], config: dict[str, Any]):
for arg in config["arguments"]:
par_value = par[arg["clean_name"]]
if par_value and arg["type"] == "file" and arg["direction"] == "output":
# example template: '[sample_name]_(assay)_cell_type_experimental.csv'
template = (arg.get("info") or {}).get("template") # Note: .info might be None
if template:
template_glob = template\
.replace("[sample_name]", par["run_name"])\
.replace("(assay)", "*")\
.replace("[number]", "*")
files = glob.glob(os.path.join(par["output_dir"], template_glob))
if not files and arg["required"]:
raise ValueError(f"Expected output file '{template_glob}' not found.")
elif len(files) > 1 and not arg["multiple"]:
raise ValueError(f"Expected single output file '{template_glob}', but found multiple.")
if not arg["multiple"]:
shutil.copy(files[0], par_value)
else:
# replace '*' in par_value with index
for i, file in enumerate(files):
shutil.copy(file, par_value.replace("*", str(i)))
def main(par: dict[str, Any], meta: dict[str, Any], temp_dir: str):
config = read_config(meta["config"])
# Preprocess params
par = process_params(par, config, temp_dir)
## Process parameters
cmd = [
"cwl-runner",
"--no-container",
"--preserve-entire-environment",
"--outdir", par["output_dir"],
]
if par["parallel"]:
cmd.append("--parallel")
if par["timestamps"]:
cmd.append("--timestamps")
# Create cwl file (if need be)
cwl_file = generate_cwl_file(meta, temp_dir)
cmd.append(cwl_file)
# Create params file
config_file = generate_config_file(par, config, temp_dir)
cmd.append(config_file)
# keep environment variables but set TMPDIR to temp_dir
env = dict(os.environ)
env["TMPDIR"] = temp_dir
# Create output dir if not exists
if not os.path.exists(par["output_dir"]):
os.makedirs(par["output_dir"])
# Run command
print("> " + ' '.join(cmd), flush=True)
_ = subprocess.run(
cmd,
cwd=os.path.dirname(config_file),
env=env,
check=True
)
# Copy outputs
copy_outputs(par, config)
if __name__ == "__main__":
with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody-", dir=meta["temp_dir"]) as temp_dir:
main(par, meta, temp_dir)

View File

@@ -0,0 +1,494 @@
import subprocess
import gzip
from pathlib import Path
from typing import Tuple
import numpy as np
import random
import mudata as md
## VIASH START
meta = {
"name": "bd_rhapsody_sequence_analysis",
"executable": "target/docker/bd_rhapsody/bd_rhapsody_sequence_analysis/bd_rhapsody_sequence_analysis",
"resources_dir": "src/bd_rhapsody",
"cpus": 8,
"memory_mb": 4096,
}
## VIASH END
import sys
sys.path.append(meta["resources_dir"])
from helpers.rhapsody_cell_label import index_to_sequence
meta["executable"] = Path(meta["executable"])
meta["resources_dir"] = Path(meta["resources_dir"])
#########################################################################################
# Generate index
print("> Generate index", flush=True)
# cwl_file = meta["resources_dir"] / "bd_rhapsody_make_reference.cwl"
cwl_file = "/var/bd_rhapsody_cwl/v2.2.1/Extra_Utilities/make_rhap_reference_2.2.1.cwl"
reference_small_gtf = meta["resources_dir"] / "test_data" / "reference_small.gtf"
reference_small_fa = meta["resources_dir"] / "test_data" / "reference_small.fa"
bdabseq_panel_fa = meta["resources_dir"] / "test_data" / "BDAbSeq_ImmuneDiscoveryPanel.fasta"
sampletagsequences_fa = meta["resources_dir"] / "test_data" / "SampleTagSequences_HomoSapiens_ver1.fasta"
config_file = Path("reference_config.yml")
reference_file = Path("Rhap_reference.tar.gz")
subprocess.run([
"cwl-runner",
"--no-container",
"--preserve-entire-environment",
"--outdir",
".",
str(cwl_file),
"--Genome_fasta",
str(reference_small_fa),
"--Gtf",
str(reference_small_gtf),
"--Extra_STAR_params",
"--genomeSAindexNbases 4"
])
#########################################################################################
# Load reference in memory
from Bio import SeqIO
import gffutils
# Load FASTA sequence
with open(str(reference_small_fa), "r") as handle:
reference_fasta_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
with open(str(bdabseq_panel_fa), "r") as handle:
bdabseq_panel_fasta_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
with open(str(sampletagsequences_fa), "r") as handle:
sampletagsequences_fasta_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta"))
# create in memory db
reference_gtf_db = gffutils.create_db(
str(reference_small_gtf),
dbfn=":memory:",
force=True,
keep_order=True,
merge_strategy="merge",
sort_attribute_values=True,
disable_infer_transcripts=True,
disable_infer_genes=True
)
#############################################
# TODO: move helper functions to separate helper file
def generate_bd_read_metadata(
instrument_id: str = "A00226",
run_id: str = "970",
flowcell_id: str = "H5FGVMXY",
lane: int = 1,
tile: int = 1101,
x: int = 1000,
y: int = 1000,
illumina_flag: str = "1:N:0",
sample_id: str = "CAGAGAGG",
) -> str:
"""
Generate a FASTQ metadata line for a BD Rhapsody FASTQ file.
Args:
instrument_id: The instrument ID.
run_id: The run ID.
flowcell_id: The flowcell ID.
lane: The lane number.
tile: The tile number. Between 1101 and 1112 in the used example data.
x: The x-coordinate. Between 1000 and 32967 in the used example data.
y: The y-coordinate. Between 1000 and 37059 in the used example data.
illumina_flag: The Illumina flag. Either 1:N:0 or 2:N:0 in the used example data.
sample_id: The sample ID.
"""
# format: @A00226:970:H5FGVDMXY:1:1101:2645:1000 2:N:0:CAGAGAGG
return f"@{instrument_id}:{run_id}:{flowcell_id}:{lane}:{tile}:{x}:{y} {illumina_flag}:{sample_id}"
def generate_bd_wta_transcript(
transcript_length: int = 42,
) -> str:
"""
Generate a WTA transcript from a given GTF and FASTA file.
"""
# Randomly select a gene
gene = random.choice(list(reference_gtf_db.features_of_type("gene")))
# Find all exons within the gene
exons = list(reference_gtf_db.children(gene, featuretype="exon", order_by="start"))
# Calculate total exon length
total_exon_length = sum(exon.end - exon.start + 1 for exon in exons)
# If total exon length is less than desired transcript length, use it as is
max_transcript_length = min(total_exon_length, transcript_length)
# Build the WTA transcript sequence
sequence = ""
for exon in exons:
exon_seq = str(reference_fasta_dict[exon.seqid].seq[exon.start - 1 : exon.end])
sequence += exon_seq
# Break if desired length is reached
if len(sequence) >= max_transcript_length:
sequence = sequence[:max_transcript_length]
break
# add padding if need be
if len(sequence) < max_transcript_length:
sequence += "N" * (max_transcript_length - len(sequence))
return sequence
def generate_bd_wta_read(
cell_index: int = 0,
bead_version: str = "EnhV2",
umi_length: int = 14,
transcript_length: int = 42,
) -> Tuple[str, str]:
"""
Generate a BD Rhapsody WTA read pair for a given cell index.
Args:
cell_index: The cell index to generate reads for.
bead_version: The bead version to use for generating the cell label.
umi_length: The length of the UMI to generate.
transcript_length: The length of the transcript to generate
Returns:
A tuple of two strings, the first string being the R1 read and the second string being the R2 read.
More info:
See structure of reads:
- https://bd-rhapsody-bioinfo-docs.genomics.bd.com/steps/top_steps.html
- https://bd-rhapsody-bioinfo-docs.genomics.bd.com/steps/steps_cell_label.html
- https://scomix.bd.com/hc/en-us/articles/360057714812-All-FAQ
R1 is Cell Label + UMI + PolyT -> 60 bp
actually, CLS1 + "GTGA" + CLS2 + "GACA" + CLS3 + UMI
R2 is the actual read -> 42 bp
Example R1
CLS1 Link CLS2 Link CLS3 UMI
AAAATCCTGT GTGA AACCAAAGT GACA GATAGAGGAG CGCATGTTTATAAC
"""
# generate metadata
per_row = np.floor((32967 - 1000) / 9)
per_col = np.floor((37059 - 1000) / 9)
assert cell_index >= 0 and cell_index < per_row * per_col, f"cell_index must be between 0 and {per_row} * {per_col}"
x = 1000 + (cell_index % per_row) * 9
y = 1000 + (cell_index // per_row) * 9
instrument_id = "A00226"
run_id = "970"
flowcell_id = "H5FGVMXY"
meta_r1 = generate_bd_read_metadata(instrument_id=instrument_id, run_id=run_id, flowcell_id=flowcell_id, x=x, y=y, illumina_flag="1:N:0")
meta_r2 = generate_bd_read_metadata(instrument_id=instrument_id, run_id=run_id, flowcell_id=flowcell_id, x=x, y=y, illumina_flag="2:N:0")
# generate r1 (cls1 + link + cls2 + link + cls3 + umi)
assert cell_index >= 0 and cell_index < 384 * 384 * 384
cell_label = index_to_sequence(cell_index + 1, bead_version=bead_version)
# sample random umi
umi = "".join(random.choices("ACGT", k=umi_length))
quality_r1 = "I" * (len(cell_label) + len(umi))
r1 = f"{meta_r1}\n{cell_label}{umi}\n+\n{quality_r1}\n"
# generate r2 by extracting sequence from fasta and gtf
wta_transcript = generate_bd_wta_transcript(transcript_length=transcript_length)
quality_r2 = "I" * transcript_length
r2 = f"{meta_r2}\n{wta_transcript}\n+\n{quality_r2}\n"
return r1, r2
def generate_bd_wta_fastq_files(
num_cells: int = 100,
num_reads_per_cell: int = 1000,
) -> Tuple[str, str]:
"""
Generate BD Rhapsody WTA FASTQ files for a given number of cells and transcripts per cell.
Args:
num_cells: The number of cells to generate
num_reads_per_cell: The number of reads to generate per cell
Returns:
A tuple of two strings, the first string being the R1 reads and the second string being the R2 reads.
"""
r1_reads = ""
r2_reads = ""
for cell_index in range(num_cells):
for _ in range(num_reads_per_cell):
r1, r2 = generate_bd_wta_read(cell_index)
r1_reads += r1
r2_reads += r2
return r1_reads, r2_reads
def generate_bd_abc_read(
cell_index: int = 0,
bead_version: str = "EnhV2",
umi_length: int = 14,
transcript_length: int = 72,
) -> Tuple[str, str]:
"""
Generate a BD Rhapsody ABC read pair for a given cell index.
Args:
cell_index: The cell index to generate reads for.
bead_version: The bead version to use for generating the cell label.
umi_length: The length of the UMI to generate.
transcript_length: The length of the transcript to generate
Returns:
A tuple of two strings, the first string being the R1 read and the second string being the R2 read.
"""
# generate metadata
per_row = np.floor((32967 - 1000) / 9)
per_col = np.floor((37059 - 1000) / 9)
assert cell_index >= 0 and cell_index < per_row * per_col, f"cell_index must be between 0 and {per_row} * {per_col}"
x = 1000 + (cell_index % per_row) * 9
y = 1000 + (cell_index // per_row) * 9
instrument_id = "A01604"
run_id = "19"
flowcell_id = "HMKLYDRXY"
meta_r1 = generate_bd_read_metadata(instrument_id=instrument_id, run_id=run_id, flowcell_id=flowcell_id, x=x, y=y, illumina_flag="1:N:0")
meta_r2 = generate_bd_read_metadata(instrument_id=instrument_id, run_id=run_id, flowcell_id=flowcell_id, x=x, y=y, illumina_flag="2:N:0")
# generate r1 (cls1 + link + cls2 + link + cls3 + umi)
assert cell_index >= 0 and cell_index < 384 * 384 * 384
cell_label = index_to_sequence(cell_index + 1, bead_version=bead_version)
# sample random umi
umi = "".join(random.choices("ACGT", k=umi_length))
quality_r1 = "I" * (len(cell_label) + len(umi))
r1 = f"{meta_r1}\n{cell_label}{umi}\n+\n{quality_r1}\n"
# generate r2 by sampling sequence from bdabseq_panel_fa
abseq_seq = str(random.choice(list(bdabseq_panel_fasta_dict.values())).seq)
abc_suffix = "AAAAAAAAAAAAAAAAAAAAAAA"
abc_data = abseq_seq[:transcript_length - len(abc_suffix) - 1]
abc_prefix = "N" + "".join(random.choices("ACGT", k=transcript_length - len(abc_data) - len(abc_suffix) - 1))
abc_transcript = f"{abc_prefix}{abc_data}{abc_suffix}"
quality_r2 = "#" + "I" * (len(abc_transcript) - 1)
r2 = f"{meta_r2}\n{abc_transcript}\n+\n{quality_r2}\n"
return r1, r2
def generate_bd_abc_fastq_files(
num_cells: int = 100,
num_reads_per_cell: int = 1000,
) -> Tuple[str, str]:
"""
Generate BD Rhapsody ABC FASTQ files for a given number of cells and transcripts per cell.
Args:
num_cells: The number of cells to generate
num_reads_per_cell: The number of reads to generate per cell
Returns:
A tuple of two strings, the first string being the R1 reads and the second string being the R2 reads.
"""
r1_reads = ""
r2_reads = ""
for cell_index in range(num_cells):
for _ in range(num_reads_per_cell):
r1, r2 = generate_bd_abc_read(cell_index)
r1_reads += r1
r2_reads += r2
return r1_reads, r2_reads
def generate_bd_smk_read(
cell_index: int = 0,
bead_version: str = "EnhV2",
umi_length: int = 14,
transcript_length: int = 72,
num_sample_tags: int = 3,
):
"""
Generate a BD Rhapsody SMK read pair for a given cell index.
Args:
cell_index: The cell index to generate reads for.
bead_version: The bead version to use for generating the cell label.
umi_length: The length of the UMI to generate.
transcript_length: The length of the transcript to generate
num_sample_tags: The number of sample tags to use
Returns:
A tuple of two strings, the first string being the R1 read and the second string being the R2 read.
"""
# generate metadata
per_row = np.floor((32967 - 1000) / 9)
per_col = np.floor((37059 - 1000) / 9)
assert cell_index >= 0 and cell_index < per_row * per_col, f"cell_index must be between 0 and {per_row} * {per_col}"
x = 1000 + (cell_index % per_row) * 9
y = 1000 + (cell_index // per_row) * 9
instrument_id = "A00226"
run_id = "970"
flowcell_id = "H5FGVDMXY"
meta_r1 = generate_bd_read_metadata(instrument_id=instrument_id, run_id=run_id, flowcell_id=flowcell_id, x=x, y=y, illumina_flag="1:N:0")
meta_r2 = generate_bd_read_metadata(instrument_id=instrument_id, run_id=run_id, flowcell_id=flowcell_id, x=x, y=y, illumina_flag="2:N:0")
# generate r1 (cls1 + link + cls2 + link + cls3 + umi)
assert cell_index >= 0 and cell_index < 384 * 384 * 384
cell_label = index_to_sequence(cell_index + 1, bead_version=bead_version)
# sample random umi
umi = "".join(random.choices("ACGT", k=umi_length))
quality_r1 = "I" * (len(cell_label) + len(umi))
r1 = f"{meta_r1}\n{cell_label}{umi}\n+\n{quality_r1}\n"
# generate r2 by selecting the cell_index %% num_sample_tags sample tags
sampletag_index = cell_index % num_sample_tags
sampletag_seq = str(list(sampletagsequences_fasta_dict.values())[sampletag_index].seq)
smk_data = sampletag_seq[:transcript_length]
smk_suffix = "A" * (transcript_length - len(smk_data))
quality_r2 = "I" * len(smk_data) + "#" * len(smk_suffix)
r2 = f"{meta_r2}\n{smk_data}{smk_suffix}\n+\n{quality_r2}\n"
return r1, r2
def generate_bd_smk_fastq_files(
num_cells: int = 100,
num_reads_per_cell: int = 1000,
num_sample_tags: int = 3,
) -> Tuple[str, str]:
"""
Generate BD Rhapsody SMK FASTQ files for a given number of cells and transcripts per cell.
Args:
num_cells: The number of cells to generate
num_reads_per_cell: The number of reads to generate per cell
num_sample_tags: The number of sample tags to use
Returns:
A tuple of two strings, the first string being the R1 reads and the second string being the R2 reads.
"""
r1_reads = ""
r2_reads = ""
for cell_index in range(num_cells):
for _ in range(num_reads_per_cell):
r1, r2 = generate_bd_smk_read(cell_index, num_sample_tags=num_sample_tags)
r1_reads += r1
r2_reads += r2
return r1_reads, r2_reads
#########################################################################################
# Prepare WTA, ABC, and SMK test data
print("> Prepare WTA test data", flush=True)
wta_reads_r1_str, wta_reads_r2_str = generate_bd_wta_fastq_files(num_cells=100, num_reads_per_cell=1000)
with gzip.open("WTAreads_R1.fq.gz", "wt") as f:
f.write(wta_reads_r1_str)
with gzip.open("WTAreads_R2.fq.gz", "wt") as f:
f.write(wta_reads_r2_str)
print("> Prepare ABC test data", flush=True)
abc_reads_r1_str, abc_reads_r2_str = generate_bd_abc_fastq_files(num_cells=100, num_reads_per_cell=1000)
with gzip.open("ABCreads_R1.fq.gz", "wt") as f:
f.write(abc_reads_r1_str)
with gzip.open("ABCreads_R2.fq.gz", "wt") as f:
f.write(abc_reads_r2_str)
print("> Prepare SMK test data", flush=True)
smk_reads_r1_str, smk_reads_r2_str = generate_bd_smk_fastq_files(num_cells=100, num_reads_per_cell=1000, num_sample_tags=3)
with gzip.open("SMKreads_R1.fq.gz", "wt") as f:
f.write(smk_reads_r1_str)
with gzip.open("SMKreads_R2.fq.gz", "wt") as f:
f.write(smk_reads_r2_str)
#########################################################################################
# Run executable
print(f">> Run {meta['name']}", flush=True)
output_dir = Path("output")
subprocess.run([
meta['executable'],
"--reads=WTAreads_R1.fq.gz;WTAreads_R2.fq.gz",
f"--reference_archive={reference_file}",
"--reads=ABCreads_R1.fq.gz;ABCreads_R2.fq.gz",
f"--abseq_reference={bdabseq_panel_fa}",
"--reads=SMKreads_R1.fq.gz;SMKreads_R2.fq.gz",
"--tag_names=1-Sample1;2-Sample2;3-Sample3",
"--sample_tags_version=human",
"--output_dir=output",
"--exact_cell_count=100",
f"---cpus={meta['cpus'] or 1}",
f"---memory={meta['memory_mb'] or 2048}mb",
# "--output_seurat=seurat.rds",
"--output_mudata=mudata.h5mu",
"--metrics_summary=metrics_summary.csv",
"--pipeline_report=pipeline_report.html",
])
# Check if output exists
print(">> Check if output exists", flush=True)
assert (output_dir / "sample_Bioproduct_Stats.csv").exists()
assert (output_dir / "sample_Metrics_Summary.csv").exists()
assert (output_dir / "sample_Pipeline_Report.html").exists()
assert (output_dir / "sample_RSEC_MolsPerCell_MEX.zip").exists()
assert (output_dir / "sample_RSEC_MolsPerCell_Unfiltered_MEX.zip").exists()
# seurat object is not generated when abc data is added
# assert (output_dir / "sample_Seurat.rds").exists()
assert (output_dir / "sample.h5mu").exists()
# check individual outputs
# assert Path("seurat.rds").exists()
assert Path("mudata.h5mu").exists()
assert Path("metrics_summary.csv").exists()
assert Path("pipeline_report.html").exists()
print(">> Check contents of output", flush=True)
data = md.read_h5mu("mudata.h5mu")
assert data.n_obs == 100, "Number of cells is incorrect"
assert "rna" in data.mod, "RNA data is missing"
assert "prot" in data.mod, "Protein data is missing"
# check rna data
data_rna = data.mod["rna"]
assert data_rna.n_vars == 1, "Number of genes is incorrect"
assert data_rna.X.sum(axis=1).min() > 950, "Number of reads per cell is incorrect"
# assert data_rna.var.Raw_Reads.sum() == 100000, "Number of reads is incorrect"
assert data_rna.var.Raw_Reads.sum() >= 99990 and data_rna.var.Raw_Reads.sum() <= 100010, \
f"Expected 100000 RNA reads, got {data_rna.var.Raw_Reads.sum()}"
# check prot data
data_prot = data.mod["prot"]
assert data_prot.n_vars == len(bdabseq_panel_fasta_dict), "Number of proteins is incorrect"
assert data_prot.X.sum(axis=1).min() > 950, "Number of reads per cell is incorrect"
assert data_prot.var.Raw_Reads.sum() >= 99990 and data_prot.var.Raw_Reads.sum() <= 100010, \
f"Expected 100000 Prot reads, got {data_prot.var.Raw_Reads.sum()}"
# check smk data
expected_sample_tags = (["SampleTag01_hs", "SampleTag02_hs", "SampleTag03_hs"] * 34)[:100]
expected_sample_names = (["Sample1", "Sample2", "Sample3"] * 34)[:100]
sample_tags = data_rna.obs["Sample_Tag"]
assert sample_tags.nunique() == 3, "Number of sample tags is incorrect"
assert sample_tags.tolist() == expected_sample_tags, "Sample tags are incorrect"
sample_names = data_rna.obs["Sample_Name"]
assert sample_names.nunique() == 3, "Number of sample names is incorrect"
assert sample_names.tolist() == expected_sample_names, "Sample names are incorrect"
# TODO: add VDJ, ATAC, and targeted RNA to test
#########################################################################################
print("> Test successful", flush=True)

View File

@@ -0,0 +1,405 @@
#!/usr/bin/env python
# copied from https://bd-rhapsody-public.s3.amazonaws.com/CellLabel/rhapsody_cell_label.py.txt
# documented at https://bd-rhapsody-bioinfo-docs.genomics.bd.com/steps/steps_cell_label.html
"""
Rhapsody cell label structure
Information on the cell label is captured by the combination of bases in three cell label sections (CLS1, CLS2, CLS3).
Two common linker sequences (L1, L2) separate the three CLS.
--CLS1---|-L1-|--CLS2---|-L2-|--CL3---|--UMI---|-CaptureSequence-
Each cell label section has a whitelist of 96 or 384 possible 9 base sequences.
All the capture oligos from a single bead will have the same cell label.
----------------
V1 beads:
[A96_cell_key1] + [v1_linker1] + [A96_cell_key2] + [v1_linker2] + [A96_cell_key3] + [8 random base UMI] + [18 base polyT capture]
----------------
Enhanced beads:
Enhanced beads contain two different capture oligo types, polyT and 5prime. On any one bead, the two different capture oligo types have the same cell label sequences.
Compared to the V1 bead, enhanced beads have shorter linker sequences, longer polyT, and 0-3 diversity insert bases at the beginning of the sequence.
The cell label sections use the same 3 sequence whitelists as V1 beads.
polyT capture oligo:
[Enh_insert 0-3 bases] + [A96_cell_key1] + [Enh_linker1] + [A96_cell_key2] + [Enh_linker2] + [A96_cell_key3] + [8 random base UMI] + [25 base polyT capture]
5prime capture oligo:
[Enh_5p_primer] + [A96_cell_key1] + [Enh_5p_linker1] + [A96_cell_key2] + [Enh_5p_linker2] + [A96_cell_key3] + [8 random base UMI] + [Tso_capture_seq]
----------------
Enhanced V2/V3 beads:
Enhanced V2/V3 beads have the same structure as Enhanced beads, but the cell label sections have been updated with increased diversity
polyT capture oligo:
[Enh_insert 0-3 bases] + [B384_cell_key1] + [Enh_linker1] + [B384_cell_key2] + [Enh_linker2] + [B384_cell_key3] + [8 random base UMI] + [25 base polyT capture]
5prime capture oligo:
[Enh_5p_primer] + [B384_cell_key1] + [Enh_5p_linker1] + [B384_cell_key2] + [Enh_5p_linker2] + [B384_cell_key3] + [8 random base UMI] + [Tso_capture_seq]
The only difference between Enh V2 and Enh V3 beads is a different Tso_capture_seq.
----------------
The Rhapsody Sequence Analysis Pipeline will convert each cell label into a single integer representing a unique cell label sequence - which is used in the output files as the 'Cell_index'.
This cell index integer is deterministic and derived from the 3 part cell label as follows:
- Get the 1-based index for each cell label section from the python sets of sequences below
- Apply this equation:
(CLS1index - 1) * 384 * 384 + (CLS2index - 1) * 384 + CLS3index
(See label_sections_to_index() function below)
Example: Enhanced bead sequence:
ACACATTGCAGTGAAGATAGTTCGACACTCAAGACA
Each part identified:
A CACATTGCA GTGA AGATAGTTC GACA CTCAAGACA
DiversityInsert A96_cell_key1-33 Linker1 A96_cell_key2-78 Linker2 A96_cell_key3-21
33-78-21
(33 - 1) * 384 * 384 + (78 - 1) * 384 + 21
=4748181
The original sequences of cell label can be determined from the cell index integer by reversing this conversion.
See index_to_label_sections() and index_to_sequence() functions below.
"""
v1_linker1 = 'ACTGGCCTGCGA'
v1_linker2 = 'GGTAGCGGTGACA'
Enh_linker1 = 'GTGA'
Enh_linker2 = 'GACA'
Enh_5p_primer = "ACAGGAAACTCATGGTGCGT"
Enh_5p_linker1 = "AATG"
Enh_5p_linker2 = "CCAC"
Enh_inserts = ["", "A", "GT", "TCA"]
Tso_capture_seq_Enh_EnhV2 = "TATGCGTAGTAGGTATG"
Tso_capture_seq_EnhV3 = "GTGGAGTCGTGATTATA"
A96_cell_key1 = ("GTCGCTATA","CTTGTACTA","CTTCACATA","ACACGCCGG","CGGTCCAGG","AATCGAATG","CCTAGTATA","ATTGGCTAA","AAGACATGC","AAGGCGATC",
"GTGTCCTTA","GGATTAGGA","ATGGATCCA","ACATAAGCG","AACTGTATT","ACCTTGCGG","CAGGTGTAG","AGGAGATTA","GCGATTACA","ACCGGATAG",
"CCACTTGGA","AGAGAAGTT","TAAGTTCGA","ACGGATATT","TGGCTCAGA","GAATCTGTA","ACCAAGGAC","AGTATCTGT","CACACACTA","ATTAAGTGC",
"AAGTAACCC","AAATCCTGT","CACATTGCA","GCACTGTCA","ATACTTAGG","GCAATCCGA","ACGCAATCA","GAGTATTAG","GACGGATTA","CAGCTGACA",
"CAACATATT","AACTTCTCC","CTATGAAAT","ATTATTACC","TACCGAGCA","TCTCTTCAA","TAAGCGTTA","GCCTTACAA","AGCACACAG","ACAGTTCCG",
"AGTAAAGCC","CAGTTTCAC","CGTTACTAA","TTGTTCCAA","AGAAGCACT","CAGCAAGAT","CAAACCGCC","CTAACTCGC","AATATTGGG","AGAACTTCC",
"CAAAGGCAC","AAGCTCAAC","TCCAGTCGA","AGCCATCAC","AACGAGAAG","CTACAGAAC","AGAGCTATG","GAGGATGGA","TGTACCTTA","ACACACAAA",
"TCAGGAGGA","GAGGTGCTA","ACCCTGACC","ACAAGGATC","ATCCCGGAG","TATGTGGCA","GCTGCCAAT","ATCAGAGCT","TCGAAGTGA","ATAGACGAG",
"AGCCCAATC","CAGAATCGT","ATCTCCACA","ACGAAAGGT","TAGCTTGTA","ACACGAGAT","AACCGCCTC","ATTTAGATG","CAAGCAAGC","CAAAGTGTG",
"GGCAAGCAA","GAGCCAATA","ATGTAATGG","CCTGAGCAA","GAGTACATT","TGCGATCTA"
)
A96_cell_key2 = ("TACAGGATA","CACCAGGTA","TGTGAAGAA","GATTCATCA","CACCCAAAG","CACAAAGGC","GTGTGTCGA","CTAGGTCCT","ACAGTGGTA","TCGTTAGCA",
"AGCGACACC","AAGCTACTT","TGTTCTCCA","ACGCGAAGC","CAGAAATCG","ACCAAAATG","AGTGTTGTC","TAGGGATAC","AGGGCTGGT","TCATCCTAA",
"AATCCTGAA","ATCCTAGGA","ACGACCACC","TTCCATTGA","TAGTCTTGA","ACTGTTAGA","ATTCATCGT","ACTTCGAGC","TTGCGTACA","CAGTGCCCG",
"GACACTTAA","AGGAGGCGC","GCCTGTTCA","GTACATCTA","AATCAGTTT","ACGATGAAT","TGACAGACA","ATTAGGCAT","GGAGTCTAA","TAGAACACA",
"AAATAAATA","CCGACAAGA","CACCTACCC","AAGAGTAGA","TCATTGAGA","GACCTTAGA","CAAGACCTA","GGAATGATA","AAACGTACC","ACTATCCTC",
"CCGTATCTA","ACACATGTC","TTGGTATGA","GTGCAGTAA","AGGATTCAA","AGAATGGAG","CTCTCTCAA","GCTAACTCA","ATCAACCGA","ATGAGTTAC",
"ACTTGATGA","ACTTTAACT","TTGGAGGTA","GCCAATGTA","ATCCAACCG","GATGAACTG","CCATGCACA","TAGTGACTA","AAACTGCGC","ATTACCAAG",
"CACTCGAGA","AACTCATTG","CTTGCTTCA","ACCTGAGTC","AGGTTCGCT","AAGGACTAT","CGTTCGGTA","AGATAGTTC","CAATTGATC","GCATGGCTA",
"ACCAGGTGT","AGCTGCCGT","TATAGCCCT","AGAGGACCA","ACAATATGG","CAGCACTTC","CACTTATGT","AGTGAAAGG","AACCCTCGG","AGGCAGCTA",
"AACCAAAGT","GAGTGCGAA","CGCTAAGCA","AATTATAAC","TACTAGTCA","CAACAACGG"
)
A96_cell_key3 = ("AAGCCTTCT","ATCATTCTG","CACAAGTAT","ACACCTTAG","GAACGACAA","AGTCTGTAC","AAATTACAG","GGCTACAGA","AATGTATCG","CAAGTAGAA",
"GATCTCTTA","AACAACGCG","GGTGAGTTA","CAGGGAGGG","TCCGTCTTA","TGCATAGTA","ACTTACGAT","TGTATGCGA","GCTCCTTGA","GGCACAACA",
"CTCAAGACA","ACGCTGTTG","ATATTGTAA","AAGTTTACG","CAGCCTGGC","CTATTAGCC","CAAACGTGG","AAAGTCATT","GTCTTGGCA","GATCAGCGA",
"ACATTCGGC","AGTAATTAG","TGAAGCCAA","TCTACGACA","CATAACGTT","ATGGGACTC","GATAGAGGA","CTACATGCG","CAACGATCT","GTTAGCCTA",
"AGTTGCATC","AAGGGAACT","ACTACATAT","CTAAGCTTC","ACGAACCAG","TACTTCGGA","AACATCCAT","AGCCTGGTT","CAAGTTTCC","CAGGCATTT",
"ACGTGGGAG","TCTCACGGA","GCAACATTA","ATGGTCCGT","CTATCATGA","CAATACAAG","AAAGAGGCC","GTAGAAGCA","GCTATGGAA","ACTCCAGGG",
"ACAAGTGCA","GATGGTCCA","TCCTCAATA","AATAAACAA","CTGTACGGA","CTAGATAGA","AGCTATGTG","AAATGGAGG","AGCCGCAAG","ACAGTAAAC",
"AACGTGTGA","ACTGAATTC","AAGGGTCAG","TGTCTATCA","TCAGATTCA","CACGATCCG","AACAGAAAC","CATGAATGA","CGTACTACG","TTCAGCTCA",
"AAGGCCGCA","GGTTGGACA","CGTCTAGGT","AATTCGGCG","CAACCTCCA","CAATAGGGT","ACAGGCTCC","ACAACTAGT","AGTTGTTCT","AATTACCGG",
"ACAAACTTT","TCTCGGTTA","ACTAGACCG","ACTCATACG","ATCGAGTCT","CATAGGTCA"
)
B384_cell_key1 = ("TGTGTTCGC","TGTGGCGCC","TGTCTAGCG","TGGTTGTCC","TGGTTCCTC","TGGTGTGCT","TGGCGACCG","TGCTGTGGC","TGCTGGCAC","TGCTCTTCC",
"TGCCTCACC","TGCCATTAT","TGATGTCTC","TGATGGCCT","TGATGCTTG","TGAAGGACC","TCTGTCTCC","TCTGATTAT","TCTGAGGTT","TCTCGTTCT",
"TCTCATCCG","TCCTGGATT","TCAGCATTC","TCACGCCTT","TATGTGCAC","TATGCGGCC","TATGACGAG","TATCTCGTG","TATATGACC","TAGGCTGTG",
"TACTGCGTT","TACGTGTCC","TAATCACAT","GTTGTGTTG","GTTGTGGCT","GTTGTCTGT","GTTGTCGAG","GTTGTCCTC","GTTGTATCC","GTTGGTTCT",
"GTTGGCGTT","GTTGGAGCG","GTTGCTGCC","GTTGCGCAT","GTTGCAGGT","GTTGCACTG","GTTGATGAT","GTTGATACG","GTTGAAGTC","GTTCTGTGC",
"GTTCTCTCG","GTTCTATAT","GTTCGTATG","GTTCGGCCT","GTTCGCGGC","GTTCGATTC","GTTCCGGTT","GTTCCGACG","GTTCACGCT","GTTATCACC",
"GTTAGTCCG","GTTAGGTGT","GTTAGAGAC","GTTAGACTT","GTTACCTCT","GTTAATTCC","GTTAAGCGC","GTGTTGCTT","GTGTTCGGT","GTGTTCCAG",
"GTGTTCATC","GTGTCACAC","GTGTCAAGT","GTGTACTGC","GTGGTTAGT","GTGGTACCG","GTGGCGATC","GTGCTTCTG","GTGCGTTCC","GTGCGGTAT",
"GTGCGCCTT","GTGCGAACT","GTGCAGCCG","GTGCAATTG","GTGCAAGGC","GTCTTGCGC","GTCTGGCCG","GTCTGAGGC","GTCTCAGAT","GTCTCAACC",
"GTCTATCGT","GTCGGTGTG","GTCGGAATC","GTCGCTCCG","GTCCTCGCC","GTCCTACCT","GTCCGCTTG","GTCCATTCT","GTCCAATAC","GTCATGTAT",
"GTCAGTGGT","GTCAGATAG","GTATTAACT","GTATCAGTC","GTATAGCCT","GTATACTTG","GTATAAGGT","GTAGCATCG","GTACCGTCC","GTACACCTC",
"GTAAGTGCC","GTAACAGAG","GGTTGTGTC","GGTTGGCTG","GGTTGACGC","GGTTCGTCG","GGTTCAGTT","GGTTATATT","GGTTAATAC","GGTGTACGT",
"GGTGCCGCT","GGTGCATGC","GGTCGTTGC","GGTCGAGGT","GGTAGGCAC","GGTAGCTTG","GGTACATAG","GGTAATCTG","GGCTTGGCC","GGCTTCACG",
"GGCTTATGT","GGCTTACTC","GGCTGTCTT","GGCTCTGTG","GGCTCCGGT","GGCTCACCT","GGCGTTGAG","GGCGTGTAC","GGCGTGCTG","GGCGTATCG",
"GGCGCTCGT","GGCGCTACC","GGCGAGCCT","GGCGAGATC","GGCGACTTG","GGCCTCTTC","GGCCTACAG","GGCCAGCGC","GGCCAACTT","GGCATTCCT",
"GGCATCCGC","GGCATAACC","GGCAACGAT","GGATGTCCG","GGATGAGAG","GGATCTGGC","GGATCCATG","GGATAGGTT","GGAGTCGTG","GGAGAAGGC",
"GGACTCCTT","GGACTAGTC","GGACCGTTG","GGAATTAGT","GGAATCTCT","GGAATCGAC","GGAAGCCTC","GCTTGTAGC","GCTTGACCG","GCTTCGGAC",
"GCTTCACAT","GCTTAGTCT","GCTGGATAT","GCTGGAACC","GCTGCGATG","GCTGATCAG","GCTGAGCGT","GCTCTTGTC","GCTCTCCTG","GCTCGGTCC",
"GCTCCAATT","GCTATTCGC","GCTATGAGT","GCTAGTGTT","GCTAGGATC","GCTAGCACT","GCTACGTAT","GCTAACCTT","GCGTTCCGC","GCGTGTGCC",
"GCGTGCATT","GCGTCGGTT","GCGTATGTG","GCGTATACT","GCGGTTCAC","GCGGTCTTG","GCGGCGTCG","GCGGCACCT","GCGCTGGAC","GCGCTCTCC",
"GCGCGGCAG","GCGCGATAC","GCGCCGACC","GCGAGCGAG","GCGAGAGGT","GCGAATTAC","GCCTTGCAT","GCCTGCGCT","GCCTAACTG","GCCGTCCGT",
"GCCGCTGTC","GCCATGCCG","GCCAGCTAT","GCCAACCAG","GCATGGTTG","GCATCGACG","GCAGGCTAG","GCAGGACGC","GCAGCCATC","GCAGATACC",
"GCAGACGTT","GCACTATGT","GCACACGAG","GATTGTCAT","GATTGGTAG","GATTGCACC","GATTCTACT","GATTCGCTT","GATTAGGCC","GATTACGGT",
"GATGTTGGC","GATGTTATG","GATGGCCAG","GATCGTTCG","GATCGGAGC","GATCGCCTC","GATCCTCTG","GATCCAGCG","GATACACGC","GAGTTACCT",
"GAGTCGTAT","GAGTCGCCG","GAGGTGTAG","GAGGCATTG","GAGCGGACG","GAGCCTGAG","GAGATCTGT","GAGATAATT","GAGACGGCT","GACTTCGTG",
"GACTGTTCT","GACTCTTAG","GACCGCATT","GAATTGAGC","GAATATTGC","GAAGGCTCT","GAAGAGACT","GAACTGCCG","GAACGCGTG","CTTGTGTAT",
"CTTGTGCGC","CTTGTCATG","CTTGGTCTT","CTTGGTACC","CTTGGATGT","CTTGCTCAC","CTTGCAATC","CTTGAGGCC","CTTGACGGT","CTTCTGATC",
"CTTCTCGTT","CTTCTAGGC","CTTCGTTAG","CTTATGTCC","CTTATGCTT","CTTATATAG","CTTAGGTTG","CTTAGGAGC","CTTACTTAT","CTGTTCTCG",
"CTGTGCCTC","CTGTCGCAT","CTGTCGAGC","CTGTAGCTG","CTGTACGTT","CTGCTTGCC","CTGCGTAGT","CTGCACACC","CTGATGGAT","CTGAGTCAT",
"CTGACGCCG","CTGAACGAG","CTCTTGTAG","CTCTTAGTT","CTCTTACCG","CTCTGCACC","CTCTCGTCC","CTCGTATTG","CTCGACTAT","CTCCTGACG",
"CTCACTAGC","CTATACGGC","CGTTCGCTC","CGTTCACCG","CGTATAGTT","CGGTGTTCC","CGGTGTCAG","CGGTCCTGC","CGGCGACTC","CGGCACGGT",
"CGGATAGCC","CGGAGAGAT","CGCTAATAG","CGCGTTGGC","CGCGCAGAG","CGCACTGCC","CCTTGTCTC","CCTTGGCGT","CCTTCTGAG","CCTTCTCCT",
"CCTTCGACC","CCTTACTTG","CCTGTTCGT","CCTGTATGC","CCTCGGCCG","CCGTTAATT","CCATGTGCG","CCAGTGGTT","CCAGGCATT","CCAGGATCC",
"CCAGCGTTG","CATTCCGAT","CATTATACC","CATGTTGAG","ATTGCGTGT","ATTGCGGAC","ATTGCGCCG","ATTGACTTG","ATTCGGCTG","ATTCGCGAG",
"ATTCCAAGT","ATTATCTTC","ATTACTGTT","ATTACACTC","ATGTTCTAT","ATGTTACGC","ATGTGTATC","ATGTGGCAG","ATGTCTGTG","ATGGTGCAT",
"ATGCTTACT","ATGCTGTCC","ATGCTCGGC","ATGAGGTTC","ATGAGAGTG","ATCTTGGCT","ATCTGTGCG","ATCGGTTCC","ATCATGCTC","ATCATCACT",
"ATATCTTAT","ATAGGCGCC","AGTTGGTAT","AGTTGAGCC","AGTGCGACC","AGGTGCTAC","AGGCTTGCG","AGGCCTTCC","AGGCACCTT","AGGAATATG",
"AGCGGCCAG","AGCCTGGTC","AGCCTGACT","AGCAATCCG","AGAGATGTT","AGAGAATTC","ACTCGCTTG","ACTCGACCT","ACGTACACC","ACGGATGGT",
"ACCAGTCTG","ACATTCGGC","ACATGAGGT","ACACTAATT"
)
B384_cell_key2 = ("TTGTGTTGT","TTGTGGTAG","TTGTGCGGA","TTGTCTGTT","TTGTCTAAG","TTGTCATAT","TTGTCACGA","TTGTATGAA","TTGTACAGT","TTGGTTAAT",
"TTGGTGCAA","TTGGTCGAG","TTGGTATTA","TTGGCACAG","TTGGATACA","TTGGAAGTG","TTGCGGTTA","TTGCCATTG","TTGCACGCG","TTGCAAGGT",
"TTGATGTAT","TTGATAATT","TTGAGACGT","TTGACTACT","TTGACCGAA","TTCTGGTCT","TTCTGCACA","TTCTCCTTA","TTCTCCGCT","TTCTAGGTA",
"TTCTAATCG","TTCGTCGTA","TTCGTAGAT","TTCGGCTTG","TTCGGAATA","TTCGCCAGA","TTCGATTGT","TTCGATCAG","TTCCTCGGT","TTCCGGCAG",
"TTCCGCATT","TTCCAATTA","TTCATTGAA","TTCATGCTG","TTCAGGAGT","TTCACTATA","TTCAACTCT","TTCAACGTG","TTATGCGTT","TTATGATTG",
"TTATCCTGT","TTATCCGAG","TTATATTAT","TTAGGCGCG","TTACTGGAA","TTACTAGTT","TTACGTGGT","TTACGATAT","TTACCTAGA","TTACATGAG",
"TTACAGCGT","TTACACGGA","TTACACACT","TTAATCAGT","TTAATAGGA","TTAAGTGTG","TTAACCTTG","TTAACACAA","TGTTCACTT","TGTTCAAGA",
"TGTTAAGTG","TGTGTTATG","TGTGTCCAA","TGTGGAGCG","TGTCAGTTA","TGTCAGAAG","TGGTTAGTT","TGGTTACAA","TGGCGTTAT","TGGCGCCAA",
"TGGAGTCTT","TGCGTATTG","TGATAGAGA","TGAGGTATT","TGAGAATCT","TCTTGGTAA","TCTTCATAG","TCTGTCCTT","TCTGGAATT","TCTACCGCG",
"TCGTTCGAA","TCGTCAGTG","TCGACGAGA","TCATGGCTT","TCACACTTA","TATTCCGAA","TATTATGGT","TATGCTATT","TATCAAGGA","TAGTTCAAT",
"TAGCTGCTT","TAGAGGAAG","TACCTGTTA","TACACCTGT","GTTGTGCGT","GTTGGCTAT","GTTGCCAAG","GTTGACCTT","GTTCTGCTA","GTTCTGAAT",
"GTTCTATCA","GTTCGCGTG","GTTCCTTAT","GTTAGCAGT","GTTACTGTG","GTTACTCAA","GTTAAGAGA","GTTAACTTA","GTGTCGGCA","GTGTCCATT",
"GTGCTTGAG","GTGCTCGTT","GTGCTCACA","GTGCCTGGA","GTCTTGTCG","GTCTTGATT","GTCTTCCGT","GTCTTAAGA","GTCTCATCT","GTCTACGAG",
"GTCGTTGCT","GTCGTGTTA","GTCGGTAAT","GTCGGATGT","GTCGAGCTG","GTCCGGACT","GTCCAACAT","GTCAGACGA","GTCAGAATT","GTCACTCTT",
"GTCAAGGAA","GTATGTCTT","GTATGTACA","GTATCGGTT","GTATATGTA","GTATACAAT","GTAGTTAAG","GTAGTCGAT","GTAGCCTTA","GTAGATACT",
"GTACGATTA","GTACAGTCT","GTAATTCGT","GCTTGGCAG","GCTTGCTTG","GCTTGAGGA","GCTTCATTA","GCTTATGCG","GCTGTGTAG","GCTGTCATG",
"GCTGGTTGT","GCTGGACTG","GCTGCCTAA","GCTGATATT","GCTCTTAGT","GCTCTATTG","GCTCGCCGT","GCTCCGCTG","GCTATTCTG","GCTATACGA",
"GCTACTAAG","GCTACATGT","GCTAACTCT","GCGTTGTAA","GCGTTCTCT","GCGTGCGTA","GCGTCTTGA","GCGTCCGAT","GCGTAAGAG","GCGCTTACG",
"GCGCGGATT","GCGCCATAT","GCGCATGAA","GCGATCAAT","GCGAGCCTT","GCGAGATTG","GCGAGAACA","GCCTTGGTA","GCCTTCTAG","GCCTTCACA",
"GCCTGAGTG","GCCTCACGT","GCCGGCGAA","GCCGCACAA","GCCATGCTT","GCCATATAT","GCCAATTCG","GCATTCGTT","GCATGATGT","GCAGTTGGA",
"GCAGTGTCT","GCACTTGTG","GCAATCTGT","GCAACACTT","GATTGTATT","GATTGCGAG","GATTCCAGT","GATTCATAT","GATTATCAG","GATTAGGTT",
"GATGTTGCG","GATGGATCT","GATGCTGAT","GATGCCTTG","GATCTCCTT","GATCGCTTA","GATATTGAA","GATATTACT","GAGTGTTAT","GAGCTCAGT",
"GAGCGTGCT","GAGCGTCGA","GAGCGGTTG","GAGCGACTT","GAGCCGAAT","GAGATAGAT","GAGACCTAT","GACGGTCGT","GACGCAGGT","GACGATATG",
"GACCTATCT","GAATTAGGA","GAATCAGCT","GAAGTTCAT","GAAGTGGTT","GAAGTATTG","GAAGGCATT","GAACGCTGT","CTTGTCCAG","CTTGGATTG",
"CTTGCTGAA","CTTGCCGTG","CTTGATTCT","CTTCTGTCG","CTTCGGCGT","CTTATGAGT","CTTACCGAT","CTGTTAGGT","CTGTCGTCT","CTGTATAAT",
"CTGGCTCAT","CTGGATGCG","CTGCGTGTG","CTGCGCGGT","CTGCCGATT","CTGCATTGT","CTGATTAAG","CTGAGATAT","CTGACCTGT","CTCGTATCT",
"CTCGGCAAG","CTCGCAATT","CTCCTGCTT","CTCCTAAGT","CTCCGGATG","CTCCGAGCG","CTCACAGGT","CTATTCTAT","CTATTAGTG","CTATGAATT",
"CTACATATT","CGTGGCATT","CGTCTTAAT","CGTCTGGTT","CGTCACTGT","CGTAGGTCT","CGGTTCGAG","CGGTTCATT","CGGTGCTCT","CGGTAATTG",
"CGGCCTGAT","CGGATATAG","CGGAATATT","CGCTCCAAT","CGCGTTCGT","CGCAGGTTG","CGAGGATGT","CGAGCTGTT","CGACGGCTT","CCTTGTGTG",
"CCTGTCTCA","CCTGACTAT","CCTACCTTG","CCGTAGATT","CCGGCTGGT","CATCGGACG","CATCGATAA","CATCCTTCT","CAGTTCTGT","CAGTGCCAG",
"CAGGCACTG","CAGCCTCTT","CACTTATAT","CACTGGTCG","CACTGCATG","CACGCGTTG","CACGATGTT","CACCATCTG","CACAGGCGT","ATTGTACAA",
"ATTGGTATG","ATTGCTAAT","ATTGCATAG","ATTGCAGTT","ATTCTGCAG","ATTCTACGT","ATTCGGATT","ATTCCGTTG","ATTCATCAA","ATTCAAGAG",
"ATTAGCCTT","ATTAATATT","ATGTTAGAG","ATGTTAACT","ATGTAGTCG","ATGGTGTAG","ATGGATTAT","ATCTTGAAG","ATCTGATAT","ATCTCAGAA",
"ATCGCTCAA","ATCGCGTCG","ATCCATGGT","ATCATGAGA","ATCATAGTT","ATCAGCGAG","ATCACCATT","ATAGTAATT","ATAGCTGTG","ATACTCTCG",
"ATACCTCAT","AGTTGCGCG","AGTTGAATT","AGTTATGAT","AGTGTCCGT","AGTGGCTTG","AGTGCTTCT","AGTATCATT","AGTACACAA","AGGTATGCG",
"AGGTATAGT","AGGCTACTT","AGGCCAGGT","AGGAGCGAT","AGCTTATAG","AGCTCTAGA","AGCGTGTAT","AGCGTCACA","AGCCTTCAT","AGCCTGTCG",
"AGCCTCGAG","AGCACTGAA","AGATGTACG","AGAGTTAAT","AGACCTCTG","ACTTCTATA","ACTGTCGAG","ACTGTATGT","ACTCTGTAA","ACTCGCGAA",
"ACTAGATCT","ACTAACGTT","ACGTTACTG","ACGTGGAAT","ACGGACTCT","ACGCCTAAT","ACGCCGTTA","ACGACGTGT","ACCTCGCAT","ACCATCATA",
"ACATATATT","ACAGGCACA","ACACCTGAG","ACACATTCT"
)
B384_cell_key3 = ("TTGTGGCTG","TTGTGGAGT","TTGTGCGAC","TTGTCTTCA","TTGTAAGAT","TTGGTTCTG","TTGGTGCGT","TTGGTCTAC","TTGGTAACT","TTGGCGTGC",
"TTGGATTAG","TTGGAGACG","TTGGAATCA","TTGCGGCGA","TTGCGCTCG","TTGCCTTAC","TTGCCGGAT","TTGCATGCT","TTGCACGTC","TTGCACCAT",
"TTGAACCTG","TTCTCGCGT","TTCTCAACT","TTCTACTCA","TTCGTCCAT","TTCGGATAC","TTCGGACGT","TTCGCAATC","TTCCGGTGC","TTCCGACTG",
"TTCATTATG","TTCATGGAT","TTCAGCGCA","TTCACCTCG","TTCAAGCAG","TTCAACTAC","TTATGCCAG","TTATGCATC","TTATCGTAC","TTATACCTA",
"TTATAATAG","TTATAAGTC","TTAGTTAGC","TTAGCTCAT","TTAGCACTA","TTAGATATG","TTACTACGA","TTACCGTCA","TTACAGAGC","TTAATTGCA",
"TTAACAGAT","TGTTGGCTA","TGTTGATGA","TGTTAAGCT","TGTGGCCGA","TGTGCTAGC","TGTGCGTCA","TGTCGCAGT","TGTCGAGCA","TGTACAACG",
"TGGTTCCGA","TGGTTCACT","TGGTCAAGT","TGGCTTGTA","TGGCTGTCG","TGGCGTATG","TGGCGCGCT","TGGATGTAC","TGGACTTGC","TGGAATACT",
"TGCTAGCGA","TGCGTTGCT","TGCGGTCTG","TGCGCTTAG","TGCGCGACG","TGCCTGCAT","TGCCTAGAC","TGCACGAGT","TGAGTGTGC","TGAGGCTCG",
"TCTTCCGTC","TCTTATAGT","TCTTACCAT","TCTGTTGTC","TCTGTTACT","TCTGGCTAG","TCTCAGATC","TCTAGTTGA","TCTAGTACG","TCGTACTAC",
"TCGGTGTAG","TCGGCTGCT","TCGCTACTG","TCGATCACG","TCGAGGCAT","TCCGGCGTC","TCCGGAGCT","TCCGCTCGT","TCCGAGTAC","TCCATTCAT",
"TCCATGGTC","TCCAAGTCG","TCATTACGT","TCATGCACT","TCAGGTTGC","TCAGACCGT","TCACTCAGT","TCAAGCTCA","TATTGCGCA","TATTCGGCT",
"TATTCCAGC","TATTCATCA","TATGTTCAG","TATGGTATG","TATGCAAGT","TATCTGGTC","TATCTGACT","TATCCAGAT","TATCAGTCG","TATCACGCT",
"TAGGCGCGA","TAGGCACAT","TAGGATCGT","TAGCATTGC","TAGAGTTAC","TAGACTGAT","TACTTGTCG","TACGTCCGA","TACCGTACT","TACCGCGAT",
"TACCAGGAC","TACAGAAGT","TAAGTGCAT","TAAGCTACT","GTTGACCGA","GTTCTCGAC","GTTCCTGCT","GTTATGATG","GTGCTTGCA","GTGCCGCGT",
"GTATTGCTG","GTATTCCGA","GTATTAAGC","GTATGACGT","GTAGTTGTC","GTAGTACAT","GTAGCTCGA","GGTTGCTCA","GGTTGAGTA","GGTTAACGT",
"GGTGTGGCA","GGTCTTCAG","GGTCGTCTA","GGTCGGCGT","GGTCCGACT","GGTCATGTC","GGTCACATG","GGTAGTGCT","GGTAGCGTC","GGTACCAGT",
"GGTAAGGAT","GGCTTGTGC","GGCTTGACT","GGCTTACGA","GGCTGTAGT","GGCTGGCAG","GGCTCCATC","GGCGTGGAT","GGCGTAATC","GGCGCAAGT",
"GGCGAGTAG","GGCGACCGT","GGCCTGTCA","GGCCATTGC","GGCACTCTG","GGATGTCAT","GGAGTAACT","GGAGAACGA","GGACTGGCT","GGACGTTCA",
"GGAACGTGC","GCTGTCCAT","GCTGGTTCA","GCTGCAACT","GCTCGTTAC","GCTATAGAT","GCTAGTCGT","GCTACCATG","GCGTTCTGA","GCGTGTTAG",
"GCGGTATCG","GCGGAGCAT","GCGCGGTGC","GCGCCTAGT","GCGCCGGCT","GCCTTCATG","GCCATACTG","GCATGTTGA","GCATGCTAC","GCAGTATAC",
"GCAGGTACT","GCAGCGCGT","GCACCTCAT","GCAATTCGA","GATTGCCGT","GATGAACAT","GATCTTCGA","GATCTGCAT","GAGTGGCAT","GAGTCGGAC",
"GAGTATGAT","GAGGCGAGT","GAGGCAACG","GAGCGCACT","GAATAGGCT","ATTGTCACT","ATTGTATCA","ATTGGTCAG","ATTGGCGAT","ATTGATCGT",
"ATTCGTAGT","ATTCATACG","ATTCAGGAC","ATTACTTCA","ATTAATTAG","ATTAAGCAT","ATGTCTCTA","ATGTAGCGT","ATGGCATAC","ATGGAGATC",
"ATGGACTCG","ATGGAACGA","ATGCTTCAT","ATGCTCGCT","ATGCGACGT","ATGCCGTAG","ATGAGTTCG","ATGACTATC","ATGACCGAC","ATCTTATGC",
"ATCTTACTA","ATCTATCAG","ATCGTGTAC","ATCGTCTGA","ATCGGCATG","ATCGCGAGC","ATCGCAACG","ATCGATGCT","ATCGAATAG","ATCCTTCTG",
"ATCCTGCGT","ATCCGCACT","ATCCATTAC","ATCCAAGCA","ATCAGATCA","ATCACACAT","ATCAACGTC","ATCAACCGA","ATATTGAGT","ATATTCGTC",
"ATATTACAG","ATATCTTGA","ATATCGCAT","ATATCAATC","ATAGTCCTG","ATAGGTCTA","ATAGCTGAC","ATAGCGGTA","AGTTCGCTG","AGTTACAGC",
"AGTTAACTA","AGTGCAATC","AGTCTGGTA","AGTCTGAGC","AGTCTACAT","AGTCGAACT","AGTCCATCG","AGTCATTCA","AGTATCCAG","AGTAGACTG",
"AGTAATCGA","AGTAAGTGC","AGGTTGGCT","AGGTTCTAG","AGGTGTTCA","AGGTGCCAT","AGGTCTGAT","AGGTCGTAC","AGGTCAGCA","AGGCTTATC",
"AGGCTATGA","AGGCCGACG","AGGCCAAGC","AGGCAGGTC","AGGCAAGAT","AGGAGCAGT","AGGACCGCT","AGGAATTAC","AGCTTGGAC","AGCTTAAGT",
"AGCTACACG","AGCGTTACG","AGCGGTGCA","AGCGGAGTC","AGCGGACGA","AGCGCGCTA","AGCGATAGC","AGCGACTCA","AGCCTCTAC","AGCCGTCGT",
"AGCATGATC","AGCACTTCG","AGCACGGCA","AGATTCTGA","AGATTAGAT","AGATGATAG","AGATATGTA","AGATACCGT","AGAGTGCGT","AGAGCCGAT",
"AGACTCACT","ACTTGCCTA","ACTTGAGCA","ACTTCTAGC","ACTTCGACT","ACTTAGTAC","ACTGTTGAT","ACTGTAACG","ACTGGTATC","ACTGACGTC",
"ACTGAAGCT","ACTCTGATG","ACTCCTGAC","ACTCCGCTA","ACTCAACTG","ACTATTGCA","ACTAGGCAG","ACTACGCGT","ACTAATACT","ACGTTCGTA",
"ACGTGTGCT","ACGTGTATG","ACGTGGAGC","ACGTCTTCG","ACGTCAGTC","ACGGTCTCA","ACGGTCCGT","ACGGTACAG","ACGGCGCTG","ACGCTGCGA",
"ACGCGTGTA","ACGCGCCAG","ACGATGTCG","ACGATGGAT","ACGATCTAC","ACGAGCTGA","ACGAGCATC","ACGAATCGT","ACGAACGCA","ACCTTGTAG",
"ACCTGTTGC","ACCTGTCAT","ACCTCGATC","ACCTAGGTA","ACCTACTGA","ACCTAATCG","ACCGTAGCA","ACCGGTAGT","ACCGGCTAC","ACCGCTTCA",
"ACATTGTGC","ACATTCTCG","ACATGGCTG","ACATGACGA","ACATATGAT","ACATATACG","ACAGCGTAC","ACACTTGCT","ACACTATCA","ACACGCATG",
"ACACCAGTA","ACACCAACT","ACACATAGT","ACACACCTA"
)
def label_sections_to_index(label):
"""
Return the cell_index integer based on input 3 part cell label string
"""
cl1, cl2, cl3 = [int(n) for n in label.split('-')]
return (cl1 - 1) * 384 * 384 + (cl2 - 1) * 384 + (cl3 - 1) + 1
# print(label_sections_to_index('1-1-1'))
# print(label_sections_to_index('33-78-21'))
# print(label_sections_to_index('43-12-77'))
# print(label_sections_to_index('96-96-96'))
# print(label_sections_to_index('135-43-344'))
# print(label_sections_to_index('384-384-384'))
# print('-')
#----------------------------------
def index_to_label_sections(index):
zerobased = int(index) - 1
cl1 = (int((zerobased) / 384 / 384) % 384) + 1
cl2 = (int((zerobased) / 384) % 384) + 1
cl3 = (zerobased % 384) + 1
return f'{cl1}-{cl2}-{cl3}'
# print(index_to_label_sections(1))
# print(index_to_label_sections(4748181))
# print(index_to_label_sections(6197453))
# print(index_to_label_sections(14044896))
# print(index_to_label_sections(19775576))
# print(index_to_label_sections(56623104))
# print('-')
#----------------------------------
def index_to_sequence(index, bead_version):
zerobased = int(index) - 1
cl1 = (int((zerobased) / 384 / 384) % 384) + 1
cl2 = (int((zerobased) / 384) % 384) + 1
cl3 = (zerobased % 384) + 1
if bead_version == 'v1':
cls1_sequence = A96_cell_key1[cl1-1]
cls2_sequence = A96_cell_key2[cl2-1]
cls3_sequence = A96_cell_key3[cl3-1]
return f'{cls1_sequence}{v1_linker1}{cls2_sequence}{v1_linker2}{cls3_sequence}'
elif bead_version == 'Enh':
diversityInsert = ''
if 1 <= cl1 <= 24:
diversityInsert = ''
elif 25 <= cl1 <= 48:
diversityInsert = 'A'
elif 49 <= cl1 <= 72:
diversityInsert = 'GT'
else: # 73 <= cl1 <= 96:
diversityInsert = 'TCA'
cls1_sequence = A96_cell_key1[cl1-1]
cls2_sequence = A96_cell_key2[cl2-1]
cls3_sequence = A96_cell_key3[cl3-1]
return f'{diversityInsert}{cls1_sequence}{Enh_linker1}{cls2_sequence}{Enh_linker2}{cls3_sequence}'
elif bead_version == 'EnhV2':
diversityInsert = ''
subIndex = ((cl1-1) % 96) + 1
if 1 <= subIndex <= 24:
diversityInsert = ''
elif 25 <= subIndex <= 48:
diversityInsert = 'A'
elif 49 <= subIndex <= 72:
diversityInsert = 'GT'
else: # 73 <= subIndex <= 96:
diversityInsert = 'TCA'
cls1_sequence = B384_cell_key1[cl1-1]
cls2_sequence = B384_cell_key2[cl2-1]
cls3_sequence = B384_cell_key3[cl3-1]
return f'{diversityInsert}{cls1_sequence}{Enh_linker1}{cls2_sequence}{Enh_linker2}{cls3_sequence}'
# print(index_to_sequence(4748181, 'Enh'))
# print(index_to_sequence(52923177, 'EnhV2'))
#----------------------------------
def create_cell_index_fasta_V1():
with open('Rhapsody_cellBarcodeV1_IndexToSequence.fasta', 'w') as f:
for cl1 in range(1, 96+1):
for cl2 in range(1, 96+1):
for cl3 in range(1, 96+1):
index = label_sections_to_index(f'{cl1}-{cl2}-{cl3}')
sequence = index_to_sequence(index, 'v1')
f.write(f'>{index}\n')
f.write(f'{sequence}\n')
#create_cell_index_fasta_V1()
def create_cell_index_fasta_Enh():
with open('Rhapsody_cellBarcodeEnh_IndexToSequence.fasta', 'w') as f:
for cl1 in range(1, 96+1):
for cl2 in range(1, 96+1):
for cl3 in range(1, 96+1):
index = label_sections_to_index(f'{cl1}-{cl2}-{cl3}')
sequence = index_to_sequence(index, 'Enh')
f.write(f'>{index}\n')
f.write(f'{sequence}\n')
#create_cell_index_fasta_Enh()
def create_cell_index_fasta_EnhV2():
with open('Rhapsody_cellBarcodeEnhV2_IndexToSequence.fasta', 'w') as f:
for cl1 in range(1, 384+1):
for cl2 in range(1, 384+1):
for cl3 in range(1, 384+1):
index = label_sections_to_index(f'{cl1}-{cl2}-{cl3}')
sequence = index_to_sequence(index, 'EnhV2')
f.write(f'>{index}\n')
f.write(f'{sequence}\n')
#create_cell_index_fasta_EnhV2()

View File

@@ -0,0 +1,60 @@
>CD11c:B-LY6|ITGAX|AHS0056|pAbO Catalog_940024
ATGCGTTGCGAGAGATATGCGTAGGTTGCTGATTGG
>CD14:MPHIP9|CD14|AHS0037|pAbO Catalog_940005
TGGCCCGTGGTAGCGCAATGTGAGATCGTAATAAGT
>CXCR5|CXCR5|AHS0039|pAbO Catalog_940042
AGGAAGGTCGATTGTATAACGCGGCATTGTAACGGC
>CD19:SJ25C1|CD19|AHS0030|pAbO Catalog_940004
TAGTAATGTGTTCGTAGCCGGTAATAATCTTCGTGG
>CD25:2A3|IL2RA|AHS0026|pAbO Catalog_940009
AGTTGTATGGGTTAGCCGAGAGTAGTGCGTATGATT
>CD27:M-T271|CD27|AHS0025|pAbO Catalog_940018
TGTCCGGTTTAGCGAATTGGGTTGAGTCACGTAGGT
>CD278|ICOS|AHS0012|pAbO Catalog_940043
ATAGTCCGCCGTAATCGTTGTGTCGCTGAAAGGGTT
>CD279:EH12-1|PDCD1|AHS0014|pAbO Catalog_940015
ATGGTAGTATCACGACGTAGTAGGGTAATTGGCAGT
>CD3:UCHT1|CD3E|AHS0231|pAbO Catalog_940307
AGCTAGGTGTTATCGGCAAGTTGTACGGTGAAGTCG
>GITR|TNFRSF18|AHS0104|pAbO Catalog_940096
TCTGTGTGTCGGGTTGAATCGTAGTGAGTTAGCGTG
>Tim3|HAVCR2|AHS0016|pAbO Catalog_940066
TAGGTAGTAGTCCCGTATATCCGATCCGTGTTGTTT
>CD4:SK3|CD4|AHS0032|pAbO Catalog_940001
TCGGTGTTATGAGTAGGTCGTCGTGCGGTTTGATGT
>CD45RA:HI100|PTPRC|AHS0009|pAbO Catalog_940011
AAGCGATTGCGAAGGGTTAGTCAGTACGTTATGTTG
>CD56:NCAM16.2|NCAM1|AHS0019|pAbO Catalog_940007
AGAGGTTGAGTCGTAATAATAATCGGAAGGCGTTGG
>CD62L:DREG-56|SELL|AHS0049|pAbO Catalog_940041
ATGGTAAATATGGGCGAATGCGGGTTGTGCTAAAGT
>CCR7|CCR7|AHS0273|pAbO Catalog_940394
AATGTGTGATCGGCAAAGGGTTCTCGGGTTAATATG
>CXCR6|CXCR6|AHS0148|pAbO Catalog_940234
GTGGTTGGTTATTCGGACGGTTCTATTGTGAGCGCT
>CD127|IL7R|AHS0028|pAbO Catalog_940012
AGTTATTAGGCTCGTAGGTATGTTTAGGTTATCGCG
>CD134:ACT35|TNFRSF4|AHS0013|pAbO Catalog_940060
GGTGTTGGTAAGACGGACGGAGTAGATATTCGAGGT
>CD28:L293|CD28|AHS0138|pAbO Catalog_940226
TTGTTGAGGATACGATGAAGCGGTTTAAGGGTGTGG
>CD272|BTLA|AHS0052|pAbO Catalog_940105
GTAGGTTGATAGTCGGCGATAGTGCGGTTGAAAGCT
>CD8:SK1|CD8A|AHS0228|pAbO Catalog_940305
AGGACATAGAGTAGGACGAGGTAGGCTTAAATTGCT
>HLA-DR|CD74|AHS0035|pAbO Catalog_940010
TGTTGGTTATTCGTTAGTGCATCCGTTTGGGCGTGG
>CD16:3G8|FCGR3A|AHS0053|pAbO Catalog_940006
TAAATCTAATCGCGGTAACATAACGGTGGGTAAGGT
>CD183|CXCR3|AHS0031|pAbO Catalog_940030
AAAGTGTTGGCGTTATGTGTTCGTTAGCGGTGTGGG
>CD196|CCR6|AHS0034|pAbO Catalog_940033
ACGTGTTATGGTGTTGTTCGAATTGTGGTAGTCAGT
>CD137|TNFRSF9|AHS0003|pAbO Catalog_940055
TGACAAGCAACGAGCGATACGAAAGGCGAAATTAGT
>CD161:HP-3G10|KLRB1|AHS0205|pAbO Catalog_940283
TTTAGGACGATTAGTTGTGCGGCATAGGAGGTGTTC
>IgM|IGHM|AHS0198|pAbO Catalog_940276
TTTGGAGGGTAGCTAGTTGCAGTTCGTGGTCGTTTC
>IgD|IGHD|AHS0058|pAbO Catalog_940026
TGAGGGATGTATAGCGAGAATTGCGACCGTAGACTT

View File

@@ -0,0 +1,24 @@
>SampleTag01_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGATTCAAGGGCAGCCGCGTCACGATTGGATACGACTGTTGGACCGG
>SampleTag02_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGTGGATGGGATAAGTGCGTGATGGACCGAAGGGACCTCGTGGCCGG
>SampleTag03_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCGGCTCGTGCTGCGTCGTCTCAAGTCCAGAAACTCCGTGTATCCT
>SampleTag04_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGATTGGGAGGCTTTCGTACCGCTGCCGCCACCAGGTGATACCCGCT
>SampleTag05_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCTCCCTGGTGTTCAATACCCGATGTGGTGGGCAGAATGTGGCTGG
>SampleTag06_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGTTACCCGCAGGAAGACGTATACCCCTCGTGCCAGGCGACCAATGC
>SampleTag07_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGTGTCTACGTCGGACCGCAAGAAGTGAGTCAGAGGCTGCACGCTGT
>SampleTag08_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCCCCACCAGGTTGCTTTGTCGGACGAGCCCGCACAGCGCTAGGAT
>SampleTag09_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGGTGATCCGCGCAGGCACACATACCGACTCAGATGGGTTGTCCAGG
>SampleTag10_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGGCAGCCGGCGTCGTACGAGGCACAGCGGAGACTAGATGAGGCCCC
>SampleTag11_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCGCGTCCAATTTCCGAAGCCCCGCCCTAGGAGTTCCCCTGCGTGC
>SampleTag12_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGGCCCATTCATTGCACCCGCCAGTGATCGACCCTAGTGGAGCTAAG

View File

@@ -0,0 +1,141 @@
#!/bin/bash
TMP_DIR=/tmp/bd_rhapsody_make_reference
OUT_DIR=src/bd_rhapsody/test_data
# check if seqkit is installed
if ! command -v seqkit &> /dev/null; then
echo "seqkit could not be found"
exit 1
fi
# create temporary directory and clean up on exit
mkdir -p $TMP_DIR
function clean_up {
rm -rf "$TMP_DIR"
}
trap clean_up EXIT
# fetch reference
ORIG_FA=$TMP_DIR/reference.fa.gz
if [ ! -f $ORIG_FA ]; then
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz \
-O $ORIG_FA
fi
ORIG_GTF=$TMP_DIR/reference.gtf.gz
if [ ! -f $ORIG_GTF ]; then
wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz \
-O $ORIG_GTF
fi
# create small reference
START=30000
END=31500
CHR=chr1
# subset to small region
seqkit grep -r -p "^$CHR\$" "$ORIG_FA" | \
seqkit subseq -r "$START:$END" > $OUT_DIR/reference_small.fa
zcat "$ORIG_GTF" | \
awk -v FS='\t' -v OFS='\t' "
\$1 == \"$CHR\" && \$4 >= $START && \$5 <= $END {
\$4 = \$4 - $START + 1;
\$5 = \$5 - $START + 1;
print;
}" > $OUT_DIR/reference_small.gtf
# download bdabseq immunediscoverypanel fasta
# note: was contained in http://bd-rhapsody-public.s3.amazonaws.com/Rhapsody-Demo-Data-Inputs/12WTA-ABC-SMK-EB-5kJRT.tar
cat > $OUT_DIR/BDAbSeq_ImmuneDiscoveryPanel.fasta <<EOF
>CD11c:B-LY6|ITGAX|AHS0056|pAbO Catalog_940024
ATGCGTTGCGAGAGATATGCGTAGGTTGCTGATTGG
>CD14:MPHIP9|CD14|AHS0037|pAbO Catalog_940005
TGGCCCGTGGTAGCGCAATGTGAGATCGTAATAAGT
>CXCR5|CXCR5|AHS0039|pAbO Catalog_940042
AGGAAGGTCGATTGTATAACGCGGCATTGTAACGGC
>CD19:SJ25C1|CD19|AHS0030|pAbO Catalog_940004
TAGTAATGTGTTCGTAGCCGGTAATAATCTTCGTGG
>CD25:2A3|IL2RA|AHS0026|pAbO Catalog_940009
AGTTGTATGGGTTAGCCGAGAGTAGTGCGTATGATT
>CD27:M-T271|CD27|AHS0025|pAbO Catalog_940018
TGTCCGGTTTAGCGAATTGGGTTGAGTCACGTAGGT
>CD278|ICOS|AHS0012|pAbO Catalog_940043
ATAGTCCGCCGTAATCGTTGTGTCGCTGAAAGGGTT
>CD279:EH12-1|PDCD1|AHS0014|pAbO Catalog_940015
ATGGTAGTATCACGACGTAGTAGGGTAATTGGCAGT
>CD3:UCHT1|CD3E|AHS0231|pAbO Catalog_940307
AGCTAGGTGTTATCGGCAAGTTGTACGGTGAAGTCG
>GITR|TNFRSF18|AHS0104|pAbO Catalog_940096
TCTGTGTGTCGGGTTGAATCGTAGTGAGTTAGCGTG
>Tim3|HAVCR2|AHS0016|pAbO Catalog_940066
TAGGTAGTAGTCCCGTATATCCGATCCGTGTTGTTT
>CD4:SK3|CD4|AHS0032|pAbO Catalog_940001
TCGGTGTTATGAGTAGGTCGTCGTGCGGTTTGATGT
>CD45RA:HI100|PTPRC|AHS0009|pAbO Catalog_940011
AAGCGATTGCGAAGGGTTAGTCAGTACGTTATGTTG
>CD56:NCAM16.2|NCAM1|AHS0019|pAbO Catalog_940007
AGAGGTTGAGTCGTAATAATAATCGGAAGGCGTTGG
>CD62L:DREG-56|SELL|AHS0049|pAbO Catalog_940041
ATGGTAAATATGGGCGAATGCGGGTTGTGCTAAAGT
>CCR7|CCR7|AHS0273|pAbO Catalog_940394
AATGTGTGATCGGCAAAGGGTTCTCGGGTTAATATG
>CXCR6|CXCR6|AHS0148|pAbO Catalog_940234
GTGGTTGGTTATTCGGACGGTTCTATTGTGAGCGCT
>CD127|IL7R|AHS0028|pAbO Catalog_940012
AGTTATTAGGCTCGTAGGTATGTTTAGGTTATCGCG
>CD134:ACT35|TNFRSF4|AHS0013|pAbO Catalog_940060
GGTGTTGGTAAGACGGACGGAGTAGATATTCGAGGT
>CD28:L293|CD28|AHS0138|pAbO Catalog_940226
TTGTTGAGGATACGATGAAGCGGTTTAAGGGTGTGG
>CD272|BTLA|AHS0052|pAbO Catalog_940105
GTAGGTTGATAGTCGGCGATAGTGCGGTTGAAAGCT
>CD8:SK1|CD8A|AHS0228|pAbO Catalog_940305
AGGACATAGAGTAGGACGAGGTAGGCTTAAATTGCT
>HLA-DR|CD74|AHS0035|pAbO Catalog_940010
TGTTGGTTATTCGTTAGTGCATCCGTTTGGGCGTGG
>CD16:3G8|FCGR3A|AHS0053|pAbO Catalog_940006
TAAATCTAATCGCGGTAACATAACGGTGGGTAAGGT
>CD183|CXCR3|AHS0031|pAbO Catalog_940030
AAAGTGTTGGCGTTATGTGTTCGTTAGCGGTGTGGG
>CD196|CCR6|AHS0034|pAbO Catalog_940033
ACGTGTTATGGTGTTGTTCGAATTGTGGTAGTCAGT
>CD137|TNFRSF9|AHS0003|pAbO Catalog_940055
TGACAAGCAACGAGCGATACGAAAGGCGAAATTAGT
>CD161:HP-3G10|KLRB1|AHS0205|pAbO Catalog_940283
TTTAGGACGATTAGTTGTGCGGCATAGGAGGTGTTC
>IgM|IGHM|AHS0198|pAbO Catalog_940276
TTTGGAGGGTAGCTAGTTGCAGTTCGTGGTCGTTTC
>IgD|IGHD|AHS0058|pAbO Catalog_940026
TGAGGGATGTATAGCGAGAATTGCGACCGTAGACTT
EOF
# this was obtained by running the command:
# docker run bdgenomics/rhapsody:2.2.1 cat /rhapsody/control_files/SampleTagSequences_HomoSapiens_ver1.fasta
cat > $OUT_DIR/SampleTagSequences_HomoSapiens_ver1.fasta <<EOF
>SampleTag01_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGATTCAAGGGCAGCCGCGTCACGATTGGATACGACTGTTGGACCGG
>SampleTag02_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGTGGATGGGATAAGTGCGTGATGGACCGAAGGGACCTCGTGGCCGG
>SampleTag03_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCGGCTCGTGCTGCGTCGTCTCAAGTCCAGAAACTCCGTGTATCCT
>SampleTag04_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGATTGGGAGGCTTTCGTACCGCTGCCGCCACCAGGTGATACCCGCT
>SampleTag05_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCTCCCTGGTGTTCAATACCCGATGTGGTGGGCAGAATGTGGCTGG
>SampleTag06_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGTTACCCGCAGGAAGACGTATACCCCTCGTGCCAGGCGACCAATGC
>SampleTag07_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGTGTCTACGTCGGACCGCAAGAAGTGAGTCAGAGGCTGCACGCTGT
>SampleTag08_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCCCCACCAGGTTGCTTTGTCGGACGAGCCCGCACAGCGCTAGGAT
>SampleTag09_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGGTGATCCGCGCAGGCACACATACCGACTCAGATGGGTTGTCCAGG
>SampleTag10_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGGCAGCCGGCGTCGTACGAGGCACAGCGGAGACTAGATGAGGCCCC
>SampleTag11_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGCGCGTCCAATTTCCGAAGCCCCGCCCTAGGAGTTCCCCTGCGTGC
>SampleTag12_hs|stAbO
GTTGTCAAGATGCTACCGTTCAGAGGCCCATTCATTGCACCCGCCAGTGATCGACCCTAGTGGAGCTAAG
EOF

View File

@@ -235,9 +235,9 @@ build_info:
output: "target/executable/agat/agat_convert_bed2gff"
executable: "target/executable/agat/agat_convert_bed2gff/agat_convert_bed2gff"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -515,9 +515,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t
LABEL org.opencontainers.image.authors="Leïla Paquay"
LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_bed2gff"
LABEL org.opencontainers.image.created="2024-09-17T06:47:34Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:57Z"
LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -225,9 +225,9 @@ build_info:
output: "target/executable/agat/agat_convert_embl2gff"
executable: "target/executable/agat/agat_convert_embl2gff/agat_convert_embl2gff"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -505,9 +505,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t
LABEL org.opencontainers.image.authors="Leïla Paquay"
LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_embl2gff"
LABEL org.opencontainers.image.created="2024-09-17T06:47:33Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:56Z"
LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -230,9 +230,9 @@ build_info:
output: "target/executable/agat/agat_convert_genscan2gff"
executable: "target/executable/agat/agat_convert_genscan2gff/agat_convert_genscan2gff"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -514,9 +514,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t
LABEL org.opencontainers.image.authors="Leïla Paquay"
LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_genscan2gff"
LABEL org.opencontainers.image.created="2024-09-17T06:47:34Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:57Z"
LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -228,9 +228,9 @@ build_info:
output: "target/executable/agat/agat_convert_sp_gff2gtf"
executable: "target/executable/agat/agat_convert_sp_gff2gtf/agat_convert_sp_gff2gtf"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -519,9 +519,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t
LABEL org.opencontainers.image.authors="Leïla Paquay"
LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_sp_gff2gtf"
LABEL org.opencontainers.image.created="2024-09-17T06:47:34Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:57Z"
LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -188,9 +188,9 @@ build_info:
output: "target/executable/agat/agat_convert_sp_gff2tsv"
executable: "target/executable/agat/agat_convert_sp_gff2tsv/agat_convert_sp_gff2tsv"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -484,9 +484,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t
LABEL org.opencontainers.image.authors="Leïla Paquay"
LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_sp_gff2tsv"
LABEL org.opencontainers.image.created="2024-09-17T06:47:33Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:56Z"
LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -195,9 +195,9 @@ build_info:
output: "target/executable/agat/agat_convert_sp_gxf2gxf"
executable: "target/executable/agat/agat_convert_sp_gxf2gxf/agat_convert_sp_gxf2gxf"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -493,9 +493,9 @@ RUN agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.t
LABEL org.opencontainers.image.authors="Leïla Paquay"
LABEL org.opencontainers.image.description="Companion container for running component agat agat_convert_sp_gxf2gxf"
LABEL org.opencontainers.image.created="2024-09-17T06:47:33Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:56Z"
LABEL org.opencontainers.image.source="https://github.com/NBISweden/AGAT"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -706,9 +706,9 @@ build_info:
output: "target/executable/arriba"
executable: "target/executable/arriba/arriba"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -754,9 +754,9 @@ RUN arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s\(.*\)/arriba: "\1"/' >
LABEL org.opencontainers.image.authors="Robrecht Cannoodt"
LABEL org.opencontainers.image.description="Companion container for running component arriba"
LABEL org.opencontainers.image.created="2024-09-17T06:47:36Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:59Z"
LABEL org.opencontainers.image.source="https://github.com/suhrig/arriba"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -469,9 +469,9 @@ build_info:
output: "target/executable/bcftools/bcftools_annotate"
executable: "target/executable/bcftools/bcftools_annotate/bcftools_annotate"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -650,9 +650,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_annotate"
LABEL org.opencontainers.image.created="2024-09-17T06:47:28Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:50Z"
LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -335,9 +335,9 @@ build_info:
output: "target/executable/bcftools/bcftools_concat"
executable: "target/executable/bcftools/bcftools_concat/bcftools_concat"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -566,9 +566,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_concat"
LABEL org.opencontainers.image.created="2024-09-17T06:47:28Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:51Z"
LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -416,9 +416,9 @@ build_info:
output: "target/executable/bcftools/bcftools_norm"
executable: "target/executable/bcftools/bcftools_norm/bcftools_norm"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -589,9 +589,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_norm"
LABEL org.opencontainers.image.created="2024-09-17T06:47:29Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:51Z"
LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -185,9 +185,9 @@ build_info:
output: "target/executable/bcftools/bcftools_sort"
executable: "target/executable/bcftools/bcftools_sort/bcftools_sort"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -483,9 +483,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_sort"
LABEL org.opencontainers.image.created="2024-09-17T06:47:27Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:50Z"
LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -458,9 +458,9 @@ build_info:
output: "target/executable/bcftools/bcftools_stats"
executable: "target/executable/bcftools/bcftools_stats/bcftools_stats"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -626,9 +626,9 @@ RUN echo "bcftools: \"$(bcftools --version | grep 'bcftools' | sed -n 's/^bcftoo
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bcftools bcftools_stats"
LABEL org.opencontainers.image.created="2024-09-17T06:47:28Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:51Z"
LABEL org.opencontainers.image.source="https://github.com/samtools/bcftools"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -418,9 +418,9 @@ build_info:
output: "target/executable/bcl_convert"
executable: "target/executable/bcl_convert/bcl_convert"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -599,9 +599,9 @@ RUN echo "bcl-convert: \"$(bcl-convert -V 2>&1 >/dev/null | sed -n '/Version/ s/
LABEL org.opencontainers.image.authors="Toni Verbeiren, Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component bcl_convert"
LABEL org.opencontainers.image.created="2024-09-17T06:47:35Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:58Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -146,8 +146,6 @@ resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "make_rhap_reference_2.2.1_nodocker.cwl"
description: "The Reference Files Generator creates an archive containing Genome Index\n\
and Transcriptome annotation files needed for the BD Rhapsody Sequencing\nAnalysis\
\ Pipeline. The app takes as input one or more FASTA and GTF files\nand produces\
@@ -249,6 +247,7 @@ engines:
- type: "apt"
packages:
- "procps"
- "git"
interactive: false
- type: "python"
user: false
@@ -258,7 +257,12 @@ engines:
upgrade: true
- type: "docker"
run:
- "echo \"bdgenomics/rhapsody: 2.2.1\" > /var/software_versions.txt\n"
- "mkdir /var/bd_rhapsody_cwl && \\\n cd /var/bd_rhapsody_cwl && \\\n git clone\
\ https://bitbucket.org/CRSwDev/cwl.git . && \\\n git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de\n"
- type: "docker"
run:
- "VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)"
- "echo \"bdgenomics/rhapsody: \\\"$VERSION\\\"\" > /var/software_versions.txt"
entrypoint: []
cmd: null
- type: "native"
@@ -270,9 +274,9 @@ build_info:
output: "target/executable/bd_rhapsody/bd_rhapsody_make_reference"
executable: "target/executable/bd_rhapsody/bd_rhapsody_make_reference/bd_rhapsody_make_reference"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -539,19 +539,24 @@ function ViashDockerfile {
FROM bdgenomics/rhapsody:2.2.1
ENTRYPOINT []
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \
DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \
rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "cwlref-runner" "cwl-runner"
RUN echo "bdgenomics/rhapsody: 2.2.1" > /var/software_versions.txt
RUN mkdir /var/bd_rhapsody_cwl && \
cd /var/bd_rhapsody_cwl && \
git clone https://bitbucket.org/CRSwDev/cwl.git . && \
git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de
RUN VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)
RUN echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt
LABEL org.opencontainers.image.authors="Robrecht Cannoodt, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component bd_rhapsody bd_rhapsody_make_reference"
LABEL org.opencontainers.image.created="2024-09-17T06:47:38Z"
LABEL org.opencontainers.image.created="2024-09-17T09:53:02Z"
LABEL org.opencontainers.image.source="https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -1360,21 +1365,21 @@ def generate_config(par: dict[str, Any], meta, config) -> str:
for config_key, arg_type, par_value in config_key_value_pairs:
if arg_type == "file":
str = strip_margin(f"""\\
content = strip_margin(f"""\\
|{config_key}:
|""")
if isinstance(par_value, list):
for file in par_value:
str += strip_margin(f"""\\
content += strip_margin(f"""\\
| - class: File
| location: "{file}"
|""")
else:
str += strip_margin(f"""\\
content += strip_margin(f"""\\
| class: File
| location: "{par_value}"
|""")
content_list.append(str)
content_list.append(content)
else:
content_list.append(strip_margin(f"""\\
|{config_key}: {par_value}
@@ -1385,9 +1390,9 @@ def generate_config(par: dict[str, Any], meta, config) -> str:
def get_cwl_file(meta: dict[str, Any]) -> str:
# create cwl file (if need be)
cwl_file=os.path.join(meta["resources_dir"], "make_rhap_reference_2.2.1_nodocker.cwl")
cwl_file="/var/bd_rhapsody_cwl/v2.2.1/Extra_Utilities/make_rhap_reference_2.2.1.cwl"
return cwl_file
return os.path.abspath(cwl_file)
def main(par: dict[str, Any], meta: dict[str, Any]):
config = read_config(meta["config"])

View File

@@ -1,115 +0,0 @@
requirements:
InlineJavascriptRequirement: {}
class: CommandLineTool
label: Reference Files Generator for BD Rhapsodyâ„¢ Sequencing Analysis Pipeline
cwlVersion: v1.2
doc: >-
The Reference Files Generator creates an archive containing Genome Index and Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. The app takes as input one or more FASTA and GTF files and produces a compressed archive in the form of a tar.gz file. The archive contains:\n - STAR index\n - Filtered GTF file
baseCommand: run_reference_generator.sh
inputs:
Genome_fasta:
type: File[]
label: Reference Genome
doc: |-
Reference genome file in FASTA format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse.
inputBinding:
prefix: --reference-genome
shellQuote: false
Gtf:
type: File[]
label: Transcript Annotations
doc: |-
Transcript annotation files in GTF format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode v42 for Human and M31 for Mouse.
inputBinding:
prefix: --gtf
shellQuote: false
Extra_sequences:
type: File[]?
label: Extra Sequences
doc: |-
Additional sequences in FASTA format to use when building the STAR index. (E.g. phiX genome)
inputBinding:
prefix: --extra-sequences
shellQuote: false
Mitochondrial_Contigs:
type: string[]?
default: ["chrM", "chrMT", "M", "MT"]
label: Mitochondrial Contig Names
doc: |-
Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are identified as 'nuclear fragments' in the ATACseq analysis pipeline.
inputBinding:
prefix: --mitochondrial-contigs
shellQuote: false
Filtering_off:
type: boolean?
label: Turn off filtering
doc: |-
By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features having the following attribute values are are kept:
- protein_coding
- lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97)
- IG_LV_gene
- IG_V_gene
- IG_V_pseudogene
- IG_D_gene
- IG_J_gene
- IG_J_pseudogene
- IG_C_gene
- IG_C_pseudogene
- TR_V_gene
- TR_V_pseudogene
- TR_D_gene
- TR_J_gene
- TR_J_pseudogene
- TR_C_gene
If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True.
inputBinding:
prefix: --filtering-off
shellQuote: false
WTA_Only:
type: boolean?
label: WTA only index
doc: Build a WTA only index, otherwise builds a WTA + ATAC index.
inputBinding:
prefix: --wta-only-index
shellQuote: false
Archive_prefix:
type: string?
label: Archive Prefix
doc: |-
A prefix for naming the compressed archive file containing the Reference genome index and annotation files. The default value is constructed based on the input Reference files.
inputBinding:
prefix: --archive-prefix
shellQuote: false
Extra_STAR_params:
type: string?
label: Extra STAR Params
doc: |-
Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line.
Example:
--limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11
inputBinding:
prefix: --extra-star-params
shellQuote: true
Maximum_threads:
type: int?
label: Maximum Number of Threads
doc: |-
The maximum number of threads to use in the pipeline. By default, all available cores are used.
inputBinding:
prefix: --maximum-threads
shellQuote: false
outputs:
Archive:
type: File
doc: |-
A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an input in the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline.
id: Reference_Archive
label: Reference Files Archive
outputBinding:
glob: '*.tar.gz'

File diff suppressed because it is too large Load Diff

View File

@@ -187,9 +187,9 @@ build_info:
output: "target/executable/bedtools/bedtools_bamtofastq"
executable: "target/executable/bedtools/bedtools_bamtofastq/bedtools_bamtofastq"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -483,9 +483,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bamtofastq"
LABEL org.opencontainers.image.created="2024-09-17T06:47:20Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:43Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -176,9 +176,9 @@ build_info:
output: "target/executable/bedtools/bedtools_bed12tobed6"
executable: "target/executable/bedtools/bedtools_bed12tobed6/bedtools_bed12tobed6"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -480,9 +480,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bed12tobed6"
LABEL org.opencontainers.image.created="2024-09-17T06:47:19Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:42Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -214,9 +214,9 @@ build_info:
output: "target/executable/bedtools/bedtools_bedtobam"
executable: "target/executable/bedtools/bedtools_bedtobam/bedtools_bedtobam"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -496,9 +496,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_bedtobam"
LABEL org.opencontainers.image.created="2024-09-17T06:47:21Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:44Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -337,9 +337,9 @@ build_info:
output: "target/executable/bedtools/bedtools_genomecov"
executable: "target/executable/bedtools/bedtools_genomecov/bedtools_genomecov"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -591,9 +591,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_genomecov"
LABEL org.opencontainers.image.created="2024-09-17T06:47:19Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:41Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -232,9 +232,9 @@ build_info:
output: "target/executable/bedtools/bedtools_getfasta"
executable: "target/executable/bedtools/bedtools_getfasta/bedtools_getfasta"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -526,9 +526,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Dries Schaumont"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_getfasta"
LABEL org.opencontainers.image.created="2024-09-17T06:47:19Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:42Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -273,9 +273,9 @@ build_info:
output: "target/executable/bedtools/bedtools_groupby"
executable: "target/executable/bedtools/bedtools_groupby/bedtools_groupby"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -552,9 +552,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_groupby"
LABEL org.opencontainers.image.created="2024-09-17T06:47:21Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:43Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -410,9 +410,9 @@ build_info:
output: "target/executable/bedtools/bedtools_intersect"
executable: "target/executable/bedtools/bedtools_intersect/bedtools_intersect"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -633,9 +633,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_intersect"
LABEL org.opencontainers.image.created="2024-09-17T06:47:20Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:42Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -210,9 +210,9 @@ build_info:
output: "target/executable/bedtools/bedtools_links"
executable: "target/executable/bedtools/bedtools_links/bedtools_links"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -500,9 +500,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_links"
LABEL org.opencontainers.image.created="2024-09-17T06:47:20Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:43Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -279,9 +279,9 @@ build_info:
output: "target/executable/bedtools/bedtools_merge"
executable: "target/executable/bedtools/bedtools_merge/bedtools_merge"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -558,9 +558,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_merge"
LABEL org.opencontainers.image.created="2024-09-17T06:47:18Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:41Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -222,9 +222,9 @@ build_info:
output: "target/executable/bedtools/bedtools_sort"
executable: "target/executable/bedtools/bedtools_sort/bedtools_sort"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -509,9 +509,9 @@ RUN echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component bedtools bedtools_sort"
LABEL org.opencontainers.image.created="2024-09-17T06:47:21Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:44Z"
LABEL org.opencontainers.image.source="https://github.com/arq5x/bedtools2"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -158,9 +158,9 @@ build_info:
output: "target/executable/busco/busco_download_datasets"
executable: "target/executable/busco/busco_download_datasets/busco_download_datasets"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -475,9 +475,9 @@ RUN busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_version
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component busco busco_download_datasets"
LABEL org.opencontainers.image.created="2024-09-17T06:47:31Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:54Z"
LABEL org.opencontainers.image.source="https://gitlab.com/ezlab/busco"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -145,9 +145,9 @@ build_info:
output: "target/executable/busco/busco_list_datasets"
executable: "target/executable/busco/busco_list_datasets/busco_list_datasets"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -465,9 +465,9 @@ RUN busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_version
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component busco busco_list_datasets"
LABEL org.opencontainers.image.created="2024-09-17T06:47:32Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:54Z"
LABEL org.opencontainers.image.source="https://gitlab.com/ezlab/busco"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -423,9 +423,9 @@ build_info:
output: "target/executable/busco/busco_run"
executable: "target/executable/busco/busco_run/busco_run"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -632,9 +632,9 @@ RUN busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_version
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component busco busco_run"
LABEL org.opencontainers.image.created="2024-09-17T06:47:32Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:55Z"
LABEL org.opencontainers.image.source="https://gitlab.com/ezlab/busco"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -740,9 +740,9 @@ build_info:
output: "target/executable/cutadapt"
executable: "target/executable/cutadapt/cutadapt"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -831,9 +831,9 @@ RUN cutadapt --version | sed 's/\(.*\)/cutadapt: "\1"/' > /var/software_versions
LABEL org.opencontainers.image.authors="Toni Verbeiren"
LABEL org.opencontainers.image.description="Companion container for running component cutadapt"
LABEL org.opencontainers.image.created="2024-09-17T06:47:37Z"
LABEL org.opencontainers.image.created="2024-09-17T09:53:00Z"
LABEL org.opencontainers.image.source="https://github.com/marcelm/cutadapt"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -317,9 +317,9 @@ build_info:
output: "target/executable/falco"
executable: "target/executable/falco/falco"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -589,9 +589,9 @@ RUN echo "falco: \"$(falco -v | sed -n 's/^falco //p')\"" > /var/software_versio
LABEL org.opencontainers.image.authors="Toni Verbeiren"
LABEL org.opencontainers.image.description="Companion container for running component falco"
LABEL org.opencontainers.image.created="2024-09-17T06:47:38Z"
LABEL org.opencontainers.image.created="2024-09-17T09:53:01Z"
LABEL org.opencontainers.image.source="https://github.com/smithlabcode/falco"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -1083,9 +1083,9 @@ build_info:
output: "target/executable/fastp"
executable: "target/executable/fastp/fastp"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -1028,9 +1028,9 @@ RUN fastp --version 2>&1 | sed 's# #: "#;s#$#"#' > /var/software_versions.txt
LABEL org.opencontainers.image.authors="Robrecht Cannoodt"
LABEL org.opencontainers.image.description="Companion container for running component fastp"
LABEL org.opencontainers.image.created="2024-09-17T06:47:35Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:58Z"
LABEL org.opencontainers.image.source="https://github.com/OpenGene/fastp"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -340,9 +340,9 @@ build_info:
output: "target/executable/fastqc"
executable: "target/executable/fastqc/fastqc"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -601,9 +601,9 @@ RUN echo "fastqc: $(fastqc --version | sed -n 's/^FastQC //p')" > /var/software_
LABEL org.opencontainers.image.authors="Theodoro Gasperin Terra Camargo"
LABEL org.opencontainers.image.description="Companion container for running component fastqc"
LABEL org.opencontainers.image.created="2024-09-17T06:47:26Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:49Z"
LABEL org.opencontainers.image.source="https://github.com/s-andrews/FastQC"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -645,9 +645,9 @@ build_info:
output: "target/executable/featurecounts"
executable: "target/executable/featurecounts/featurecounts"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -754,9 +754,9 @@ RUN featureCounts -v 2>&1 | sed 's/featureCounts v\([0-9.]*\)/featureCounts: \1/
LABEL org.opencontainers.image.authors="Sai Nirmayi Yasa"
LABEL org.opencontainers.image.description="Companion container for running component featurecounts"
LABEL org.opencontainers.image.created="2024-09-17T06:47:30Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:52Z"
LABEL org.opencontainers.image.source="https://github.com/ShiLab-Bioinformatics/subread"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -190,9 +190,9 @@ build_info:
output: "target/executable/fq_subsample"
executable: "target/executable/fq_subsample/fq_subsample"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -493,9 +493,9 @@ mv target/release/fq /usr/local/bin/ && \
cd / && rm -rf /fq
LABEL org.opencontainers.image.description="Companion container for running component fq_subsample"
LABEL org.opencontainers.image.created="2024-09-17T06:47:25Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:48Z"
LABEL org.opencontainers.image.source="https://github.com/stjude-rust-labs/fq"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -685,9 +685,9 @@ build_info:
output: "target/executable/gffread"
executable: "target/executable/gffread/gffread"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -807,9 +807,9 @@ RUN echo "gffread: \"$(gffread --version 2>&1)\"" > /var/software_versions.txt
LABEL org.opencontainers.image.authors="Emma Rousseau"
LABEL org.opencontainers.image.description="Companion container for running component gffread"
LABEL org.opencontainers.image.created="2024-09-17T06:47:18Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:40Z"
LABEL org.opencontainers.image.source="https://github.com/gpertea/gffread"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -218,9 +218,9 @@ build_info:
output: "target/executable/kallisto/kallisto_index"
executable: "target/executable/kallisto/kallisto_index/kallisto_index"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -506,9 +506,9 @@ tar -xzf kallisto_linux-v0.50.1.tar.gz && \
mv kallisto/kallisto /usr/local/bin/
LABEL org.opencontainers.image.description="Companion container for running component kallisto kallisto_index"
LABEL org.opencontainers.image.created="2024-09-17T06:47:33Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:55Z"
LABEL org.opencontainers.image.source="https://github.com/pachterlab/kallisto"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -507,9 +507,9 @@ build_info:
output: "target/executable/lofreq/lofreq_call"
executable: "target/executable/lofreq/lofreq_call/lofreq_call"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -656,9 +656,9 @@ echo "lofreq: $version" > /var/software_versions.txt
LABEL org.opencontainers.image.authors="Kai Waldrant"
LABEL org.opencontainers.image.description="Companion container for running component lofreq lofreq_call"
LABEL org.opencontainers.image.created="2024-09-17T06:47:29Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:52Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -215,9 +215,9 @@ build_info:
output: "target/executable/lofreq/lofreq_indelqual"
executable: "target/executable/lofreq/lofreq_indelqual/lofreq_indelqual"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -501,9 +501,9 @@ echo "lofreq: $version" > /var/software_versions.txt
LABEL org.opencontainers.image.authors="Kai Waldrant"
LABEL org.opencontainers.image.description="Companion container for running component lofreq lofreq_indelqual"
LABEL org.opencontainers.image.created="2024-09-17T06:47:29Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:52Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -456,9 +456,9 @@ build_info:
output: "target/executable/multiqc"
executable: "target/executable/multiqc/multiqc"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -637,9 +637,9 @@ RUN multiqc --version | sed 's/multiqc, version\s\(.*\)/multiqc: "\1"/' > /var/s
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component multiqc"
LABEL org.opencontainers.image.created="2024-09-17T06:47:38Z"
LABEL org.opencontainers.image.created="2024-09-17T09:53:01Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/biobox"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -398,9 +398,9 @@ build_info:
output: "target/executable/pear"
executable: "target/executable/pear/pear"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -597,9 +597,9 @@ echo "pear: $version" > /var/software_versions.txt
LABEL org.opencontainers.image.authors="Kai Waldrant"
LABEL org.opencontainers.image.description="Companion container for running component pear"
LABEL org.opencontainers.image.created="2024-09-17T06:47:21Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:44Z"
LABEL org.opencontainers.image.source="https://github.com/tseemann/PEAR"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -264,9 +264,9 @@ build_info:
output: "target/executable/qualimap/qualimap_rnaseq"
executable: "target/executable/qualimap/qualimap_rnaseq/qualimap_rnaseq"
viash_version: "0.9.0"
git_commit: "38f635ad57ef05550bba3a0864c81627f84f5ad2"
git_remote: "https://x-access-token:ghs_wFnCgH6LWyegjU3sAMvKhUKRQxXHxx2DXwds@github.com/viash-hub/biobox"
git_tag: "v0.2.0-2-g38f635a"
git_commit: "7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
git_remote: "https://x-access-token:ghs_aSDBedV4vU66pddFDN6d8UEy0ZQApn08RAsh@github.com/viash-hub/biobox"
git_tag: "v0.2.0-3-g7f8bcc2"
package_config:
name: "biobox"
version: "main"

View File

@@ -527,9 +527,9 @@ RUN echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /v
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component qualimap qualimap_rnaseq"
LABEL org.opencontainers.image.created="2024-09-17T06:47:25Z"
LABEL org.opencontainers.image.created="2024-09-17T09:52:48Z"
LABEL org.opencontainers.image.source="https://bitbucket.org/kokonech/qualimap/commits/branch/master"
LABEL org.opencontainers.image.revision="38f635ad57ef05550bba3a0864c81627f84f5ad2"
LABEL org.opencontainers.image.revision="7f8bcc2b3e1ffaac9778b6acb42420b19660d1a1"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

Some files were not shown because too many files have changed in this diff Show More