Build branch fix-integration-tests with version fix-integration-tests (da62b4ff)
Build pipeline: vsh-ci-dev-gckj5
Source commit: da62b4ffe3
Source message: Add labels to qc_test component
This commit is contained in:
43
CHANGELOG.md
43
CHANGELOG.md
@@ -1,4 +1,4 @@
|
||||
# openpipelines x.x.x
|
||||
# openpipelines 2.x.x (Unreleased)
|
||||
|
||||
## BREAKING CHANGES
|
||||
|
||||
@@ -42,8 +42,6 @@
|
||||
- Store label probabilities instead of uncertainties
|
||||
- Take `.h5mu` format as an input instead of `.h5ad`
|
||||
|
||||
* `labels_transfer/knn`: delete outdated component due to its functionality now implemented in `labels_transfer/pynndescent_knn`
|
||||
|
||||
* `reference/build_cellranger_arc_reference`: a default value of "output" is now specified for the argument `--genome`, inline with `reference/build_cellranger_reference` component. Additionally, providing a value for `--organism` is no longer required and its default value of `Homo Sapiens` has been removed (PR #864).
|
||||
|
||||
## NEW FUNCTIONALITY
|
||||
@@ -91,8 +89,6 @@
|
||||
|
||||
* `dataflow/split_h5mu` component: Added a component to split a single h5mu file into multiple h5mu files based on the values of an .obs column (PR #824).
|
||||
|
||||
* `labels_transfer/pynndescent_knn`: component: Added a component for KNN classification based on a PyNNDescent neighborhood graph (PR #830).
|
||||
|
||||
* `workflows/test_workflows/ingestion` components & `workflows/ingestion`: Added standalone components for integration testing of ingestion workflows (PR #801).
|
||||
|
||||
* `workflows/ingestion/make_reference`: Add additional arguments passed through to the STAR and BD Rhapsody reference components (PR #846).
|
||||
@@ -103,7 +99,7 @@
|
||||
|
||||
* `dimred/densmap` component: Added a densMAP dimensionality reduction component (PR #748).
|
||||
|
||||
* `annotete/scanvi` component: Added a component to annotate cells using scANVI (PR #833).
|
||||
* `annotate/scanvi` component: Added a component to annotate cells using scANVI (PR #833).
|
||||
|
||||
* `transform/bpcells_regress_out` component: Added a component to regress out effects of confounding variables in the count matrix using BPCells (PR #863).
|
||||
|
||||
@@ -129,6 +125,10 @@
|
||||
|
||||
* `metadata/duplicate_var` component: Added a component to make a copy from one .var field or index to another .var field within the same MuData object (PR #877).
|
||||
|
||||
* `filter/subset_obsp` component: Added a component to subset an .obsp matrix by column based on the value of an .obs field. The resulting subset is moved to an .obsm field (PR #888).
|
||||
|
||||
* `labels_transfer/knn` component: Enable using additional distance functions for KNN classification (PR #830) and allow to perform KNN classification based on a pre-calculated neighborhood graph (PR #890).
|
||||
|
||||
## MINOR CHANGES
|
||||
|
||||
* `resources_test_scripts/cellranger_atac_tiny_bcl.sh` script: generate counts from fastq files using CellRanger atac count (PR #726).
|
||||
@@ -142,8 +142,6 @@
|
||||
|
||||
* Bump scvelo to `0.3.2` (PR #828).
|
||||
|
||||
* Bump viash to `0.8.6` (PR #815).
|
||||
|
||||
* Pin numpy<2 for several components (PR #815).
|
||||
|
||||
* Added `resources_test_scripts/cellranger_atac_tiny_bcl.sh` script: download tiny bcl file with an ATAC experiment, download a motifs file, demultiplex bcl files to reads in fastq format (PR #726).
|
||||
@@ -162,23 +160,38 @@
|
||||
|
||||
## BUG FIXES
|
||||
|
||||
* `dataflow/concatenate_h5mu`: fix writing out multidimensional annotation dataframes (e.g. `.varm`) that had their
|
||||
data dtype (dtype) changed as a result of adding more observations after concatenation, causing `TypeError`.
|
||||
One notable example of this happening is when one of the samples does not have a multimodal annotation dataframe
|
||||
which is present in another sample; causing the values being filled with `NA` (PR #837).
|
||||
|
||||
* `qc/calculate_qc_metrics`: increase total counts accuracy with low precision floating dtypes as input layer (PR #852).
|
||||
|
||||
* Fix failing tests for `ingestion/cellranger_postprocessing`, `ingestion/conversion` and `multiomics/process_batches` (PR #869).
|
||||
|
||||
* `convert/from_10xh5_to_h5mu`: add .uns slot to mdata root when metrics file is provided (PR #887).
|
||||
|
||||
* Use `params.resources_test` in test workflows in order to point to an alternative location (e.g. a cache).
|
||||
* Fix ingestion components not working when optional arguments are unset (PR #894).
|
||||
|
||||
## DOCUMENTATION
|
||||
|
||||
* Update authorship of components (PR #835).
|
||||
|
||||
# openpipelines 1.0.3
|
||||
|
||||
## BUG FIXES
|
||||
|
||||
* `qc/calculate_qc_metrics`: increase total counts accuracy with low precision floating dtypes as input layer (PR # , backported from PR #852).
|
||||
|
||||
# openpipelines 1.0.2
|
||||
|
||||
## BUG FIXES
|
||||
|
||||
* `dataflow/concatenate_h5mu`: fix writing out multidimensional annotation dataframes (e.g. `.varm`) that had their
|
||||
data dtype (dtype) changed as a result of adding more observations after concatenation, causing `TypeError`.
|
||||
One notable example of this happening is when one of the samples does not have a multimodal annotation dataframe
|
||||
which is present in another sample; causing the values being filled with `NA` (PR #842, backported from PR #837).
|
||||
|
||||
# openpipelines 1.0.1
|
||||
|
||||
## BUG FIXES
|
||||
|
||||
* Bump viash to `0.8.6` (PR #816, backported from #815). This changes the at-runtime generated nextflow process from an in-memory to an on-disk temporary file, which should cause less issues with Nextflow Fusion.
|
||||
|
||||
# openpipelines 1.0.0-rc6
|
||||
|
||||
## BUG FIXES
|
||||
|
||||
@@ -23,27 +23,26 @@ argument_groups:
|
||||
type: string
|
||||
default: "rna"
|
||||
required: false
|
||||
- name: "--var_input_gene_names"
|
||||
description: .var field containing the gene names, if the .var index is not to be used.
|
||||
type: string
|
||||
required: false
|
||||
|
||||
- name: Reference
|
||||
description: Arguments related to the reference dataset.
|
||||
- name: Reference model
|
||||
description: Arguments related to the reference model.
|
||||
arguments:
|
||||
- name: "--reference"
|
||||
type: file
|
||||
description: Reference h5mu file.
|
||||
direction: input
|
||||
required: true
|
||||
example: reference.h5mu
|
||||
- name: "--scvi_reference_model"
|
||||
type: file
|
||||
description: "Pretrained scvi reference model"
|
||||
description: "Pretrained SCVI reference model to initialize the SCANVI model with. The model needs to include the AnnData object used to trained the model stored. "
|
||||
example: scvi_model.pt
|
||||
direction: input
|
||||
required: true
|
||||
- name: "--reference_obs_label"
|
||||
type: string
|
||||
description: Key in obs field of reference AnnData with cell-type information.
|
||||
example: "cell_ontology_class"
|
||||
required: true
|
||||
required: false
|
||||
- name: "--scanvi_reference_model"
|
||||
type: file
|
||||
description: "Pretrained SCANVI reference model."
|
||||
example: scvi_model.pt
|
||||
direction: input
|
||||
required: false
|
||||
|
||||
- name: SCANVI reference model training arguments
|
||||
description: Arguments related to the reference SCANVI model.
|
||||
@@ -190,6 +189,7 @@ resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
- path: /src/utils/setup_logger.py
|
||||
- path: /src/annotate/utils/query_reference_allignment.py
|
||||
|
||||
test_resources:
|
||||
- type: python_script
|
||||
|
||||
@@ -7,14 +7,39 @@ import numpy as np
|
||||
par = {
|
||||
"input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
|
||||
"modality": "rna",
|
||||
"reference": "resources_test/annotation_test_data/TS_Blood_filtered.h5ad",
|
||||
"scvi_reference_model": "resources_test/annotation_test_data/scvi_model.pt",
|
||||
"reference_obs_label": "cell_ontology_class",
|
||||
"var_query_gene_names": None,
|
||||
"scvi_reference_model": "resources_test/annotation_test_data/scvi_model",
|
||||
"scanvi_reference_model": None,
|
||||
"unknown_celltype": "Unkown",
|
||||
"output": "output.h5mu",
|
||||
"output_obsm_scanvi_embedding": "scanvi_embedding",
|
||||
"output_obs_predictions": "scanvi_pred",
|
||||
"output_obs_probability": "scanvi_probability",
|
||||
"output_model": None,
|
||||
"output_compression": None,
|
||||
"reference_learning_rate": 1e-3,
|
||||
"reference_reduce_lr_on_plateau": True,
|
||||
"reference_lr_patience": 25,
|
||||
"reference_lr_factor": 0.5,
|
||||
"reference_train_size": 0.9,
|
||||
"reference_max_epochs": 10,
|
||||
"reference_early_stopping": True,
|
||||
"reference_early_stopping_patience": 50,
|
||||
"query_train_size": 0.9,
|
||||
"query_max_epochs": 10,
|
||||
"query_learning_rate": 1e-3,
|
||||
"query_reduce_lr_on_plateau": True,
|
||||
"query_lr_patience": 25,
|
||||
"query_lr_factor": 0.5,
|
||||
"query_early_stopping": True,
|
||||
"query_early_stopping_patience": 50
|
||||
}
|
||||
meta = {}
|
||||
meta = {"resources_dir": "src/annotate/utils"}
|
||||
## VIASH END
|
||||
|
||||
sys.path.append(meta["resources_dir"])
|
||||
from query_reference_allignment import set_var_index, cross_check_genes
|
||||
|
||||
# START TEMPORARY WORKAROUND setup_logger
|
||||
# reason: resources aren't available when using Nextflow fusion
|
||||
# from setup_logger import setup_logger
|
||||
@@ -33,72 +58,104 @@ def setup_logger():
|
||||
# END TEMPORARY WORKAROUND setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
logger.info("Reading the input and reference data")
|
||||
if (not par["scvi_reference_model"]) and not (par["scanvi_reference_model"]) or (par["scvi_reference_model"] and par["scanvi_reference_model"]):
|
||||
raise ValueError("Make sure to provide either an '--scvi_reference_model' or a '--scanvi_reference_model', but not both.")
|
||||
|
||||
input_data = mu.read_h5mu(par["input"])
|
||||
query = input_data.mod[par["modality"]]
|
||||
reference_data = mu.read_h5mu(par["reference"])
|
||||
reference = reference_data.mod[par["modality"]]
|
||||
|
||||
logger.info(f"Loading the pretrained scVI model from {par['scvi_reference_model']}")
|
||||
scvi_reference_model = scvi.model.SCVI.load(par["scvi_reference_model"], reference)
|
||||
def main():
|
||||
logger.info("Reading the query data")
|
||||
# Read in data
|
||||
input_data = mu.read_h5mu(par["input"])
|
||||
input_modality = input_data.mod[par["modality"]].copy()
|
||||
# scANVI requires query and reference gene names to be equivalent
|
||||
input_modality = set_var_index(input_modality, par["var_input_gene_names"])
|
||||
|
||||
logger.info("Setting up scANVI model")
|
||||
if par["scanvi_reference_model"]:
|
||||
|
||||
scanvi_ref = scvi.model.SCANVI.from_scvi_model(
|
||||
scvi_reference_model,
|
||||
unlabeled_category=par["unknown_celltype"],
|
||||
labels_key=par["reference_obs_label"],
|
||||
logger.info(f"Loading the pretrained scANVI model from {par['scanvi_reference_model']} and updating it with the query data {par['input']}")
|
||||
scanvi_query = scvi.model.SCANVI.load_query_data(
|
||||
input_modality,
|
||||
par["scanvi_reference_model"],
|
||||
freeze_classifier=True,
|
||||
inplace_subset_query_vars=True
|
||||
)
|
||||
|
||||
reference_plan_kwargs = {"lr": par["reference_learning_rate"],
|
||||
elif par["scvi_reference_model"]:
|
||||
|
||||
logger.info("Reading in the reference model and associated reference data")
|
||||
scvi_reference_model = scvi.model.SCVI.load(par["scvi_reference_model"])
|
||||
reference = scvi_reference_model.adata
|
||||
|
||||
|
||||
logger.info("Alligning genes in reference and query dataset")
|
||||
# scANVI requires query and reference gene names to be equivalent
|
||||
reference = set_var_index(reference)
|
||||
# Subset query dataset based on genes present in reference
|
||||
common_ens_ids = cross_check_genes(input_modality, reference)
|
||||
input_modality = input_modality[:, common_ens_ids]
|
||||
|
||||
logger.info("Instantiating scANVI model from the scVI model")
|
||||
scanvi_ref = scvi.model.SCANVI.from_scvi_model(
|
||||
scvi_reference_model,
|
||||
unlabeled_category=par["unknown_celltype"],
|
||||
labels_key=scvi_reference_model.adata_manager._registry["setup_args"]["labels_key"],
|
||||
)
|
||||
|
||||
reference_plan_kwargs = {
|
||||
"lr": par["reference_learning_rate"],
|
||||
"reduce_lr_on_plateau": par['reference_reduce_lr_on_plateau'],
|
||||
"lr_patience": par['reference_lr_patience'],
|
||||
"lr_factor": par['reference_lr_factor']
|
||||
}
|
||||
|
||||
logger.info("Training scANVI model on reference data with celltype labels")
|
||||
logger.info("Training scANVI model on reference data with celltype labels")
|
||||
|
||||
scanvi_ref.train(
|
||||
scanvi_ref.train(
|
||||
train_size=par["reference_train_size"],
|
||||
max_epochs=par['reference_max_epochs'],
|
||||
early_stopping=par['reference_early_stopping'],
|
||||
early_stopping_patience=par['reference_early_stopping_patience'],
|
||||
plan_kwargs=reference_plan_kwargs,
|
||||
check_val_every_n_epoch=1,
|
||||
accelerator="auto",
|
||||
)
|
||||
accelerator="auto"
|
||||
)
|
||||
|
||||
logger.info("Updating and training scANVI model with query data")
|
||||
scvi.model.SCANVI.prepare_query_anndata(query, scanvi_ref, inplace=True)
|
||||
scanvi_query = scvi.model.SCANVI.load_query_data(query, scanvi_ref)
|
||||
logger.info(f"Updating scANVI model with query data {par['input']}")
|
||||
scvi.model.SCANVI.prepare_query_anndata(input_modality, scanvi_ref, inplace=True)
|
||||
scanvi_query = scvi.model.SCANVI.load_query_data(input_modality, scanvi_ref)
|
||||
|
||||
query_plan_kwargs = {"lr": par["query_learning_rate"],
|
||||
logger.info("Training scANVI model with query data")
|
||||
query_plan_kwargs = {
|
||||
"lr": par["query_learning_rate"],
|
||||
"reduce_lr_on_plateau": par['query_reduce_lr_on_plateau'],
|
||||
"lr_patience": par['query_lr_patience'],
|
||||
"lr_factor": par['query_lr_factor']
|
||||
}
|
||||
|
||||
scanvi_query.train(
|
||||
scanvi_query.train(
|
||||
train_size=par["query_train_size"],
|
||||
max_epochs=par['query_max_epochs'],
|
||||
early_stopping=par['query_early_stopping'],
|
||||
early_stopping_patience=par['query_early_stopping_patience'],
|
||||
plan_kwargs=query_plan_kwargs,
|
||||
check_val_every_n_epoch=1,
|
||||
accelerator="auto",
|
||||
)
|
||||
accelerator="auto"
|
||||
)
|
||||
|
||||
logger.info("Adding latent representation to query data")
|
||||
query.obsm[par["output_obsm_scanvi_embedding"]] = scanvi_query.get_latent_representation()
|
||||
logger.info("Adding latent representation to query data")
|
||||
input_modality.obsm[par["output_obsm_scanvi_embedding"]] = scanvi_query.get_latent_representation()
|
||||
|
||||
logger.info("Running predictions on query data")
|
||||
query.obs[par["output_obs_predictions"]] = scanvi_query.predict(query)
|
||||
query.obs[par["output_obs_probability"]] = np.max(scanvi_query.predict(query, soft=True), axis=1)
|
||||
logger.info("Running predictions on query data")
|
||||
input_modality.obs[par["output_obs_predictions"]] = scanvi_query.predict(input_modality)
|
||||
input_modality.obs[par["output_obs_probability"]] = np.max(scanvi_query.predict(input_modality, soft=True), axis=1)
|
||||
|
||||
logger.info("Saving output and model")
|
||||
input_data.mod[par["modality"]] = query
|
||||
input_data.write_h5mu(par["output"], compression=par["output_compression"])
|
||||
logger.info("Saving output and model")
|
||||
input_data.mod[par["modality"]] = input_modality
|
||||
input_data.write_h5mu(par["output"], compression=par["output_compression"])
|
||||
|
||||
if par["output_model"]:
|
||||
if par["output_model"]:
|
||||
scanvi_query.save(par["output_model"], overwrite=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import pytest
|
||||
import re
|
||||
import mudata as mu
|
||||
import anndata as ad
|
||||
from openpipelinetestutils.asserters import assert_annotation_objects_equal
|
||||
import scvi
|
||||
import os
|
||||
@@ -16,6 +16,7 @@ meta = {
|
||||
input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"
|
||||
reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_scvi_model(random_path, tmp_path):
|
||||
def wrapper(input_file, reference_file):
|
||||
@@ -49,28 +50,47 @@ def create_scvi_model(random_path, tmp_path):
|
||||
scvi_model.train(max_epochs=10)
|
||||
|
||||
input_data.mod["rna"] = query
|
||||
reference_data.mod["rna"] = reference
|
||||
# reference_data.mod["rna"] = reference
|
||||
|
||||
input_data_file = random_path(extension="h5mu")
|
||||
reference_file = random_path(extension="h5mu")
|
||||
# reference_file = random_path(extension="h5mu")
|
||||
scvi_model_file = tmp_path
|
||||
|
||||
input_data.write_h5mu(input_data_file)
|
||||
reference_data.write_h5mu(reference_file)
|
||||
scvi_model.save(scvi_model_file, overwrite=True)
|
||||
# reference_data.write_h5mu(reference_file)
|
||||
scvi_model.save(scvi_model_file, save_anndata=True, overwrite=True)
|
||||
|
||||
return scvi_model_file, input_data_file, reference_file
|
||||
return scvi_model_file, input_data_file
|
||||
return wrapper
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_scanvi_model(create_scvi_model, tmp_path):
|
||||
def scanvi_wrapper():
|
||||
scvi_model_file, input_data_file = create_scvi_model(input_file, reference_file)
|
||||
|
||||
scvi_model = scvi.model.SCVI.load(scvi_model_file)
|
||||
scanvi_model = scvi.model.SCANVI.from_scvi_model(
|
||||
scvi_model,
|
||||
unlabeled_category="Unkown",
|
||||
labels_key="cell_ontology_class",
|
||||
)
|
||||
scanvi_model.train(max_epochs=10)
|
||||
|
||||
scanvi_model_file = tmp_path
|
||||
scanvi_model.save(scanvi_model_file, save_anndata=True, overwrite=True)
|
||||
|
||||
return scanvi_model_file, input_data_file
|
||||
return scanvi_wrapper
|
||||
|
||||
|
||||
def test_simple_execution(run_component, random_h5mu_path, create_scvi_model):
|
||||
scvi_model_file, input_file_scvi, reference_file_scvi = create_scvi_model(input_file, reference_file)
|
||||
scvi_model_file, input_file_scvi = create_scvi_model(input_file, reference_file)
|
||||
output_file = random_h5mu_path()
|
||||
|
||||
run_component([
|
||||
"--input", input_file_scvi,
|
||||
"--reference", reference_file_scvi,
|
||||
"--scvi_reference_model", scvi_model_file,
|
||||
"--reference_obs_label", "cell_ontology_class",
|
||||
"--reference_max_epochs", "10",
|
||||
"--query_max_epochs", "10",
|
||||
"--output", output_file
|
||||
@@ -90,15 +110,14 @@ def test_simple_execution(run_component, random_h5mu_path, create_scvi_model):
|
||||
assert_annotation_objects_equal(input_mudata.mod["prot"],
|
||||
output_mudata.mod["prot"])
|
||||
|
||||
|
||||
def test_multiple_arguments(run_component, random_h5mu_path, create_scvi_model, tmp_path):
|
||||
scvi_model_file, input_file_scvi, reference_file_scvi = create_scvi_model(input_file, reference_file)
|
||||
scvi_model_file, input_file_scvi = create_scvi_model(input_file, reference_file)
|
||||
output_file = random_h5mu_path()
|
||||
|
||||
run_component([
|
||||
"--input", input_file_scvi,
|
||||
"--reference", reference_file_scvi,
|
||||
"--scvi_reference_model", scvi_model_file,
|
||||
"--reference_obs_label", "cell_ontology_class",
|
||||
"--output", output_file,
|
||||
"--reference_max_epochs", "10",
|
||||
"--reference_reduce_lr_on_plateau", "True",
|
||||
@@ -134,9 +153,58 @@ def test_multiple_arguments(run_component, random_h5mu_path, create_scvi_model,
|
||||
assert "scanvi_pred" in output_mudata.mod["rna"].obs.keys(), "Predictions not added"
|
||||
assert "scanvi_probability" in output_mudata.mod["rna"].obs.keys(), "Probabilities not added"
|
||||
|
||||
assert_annotation_objects_equal(input_mudata.mod["prot"],
|
||||
output_mudata.mod["prot"])
|
||||
|
||||
|
||||
def test_pretrained_scanvi(run_component, random_h5mu_path, create_scanvi_model):
|
||||
scanvi_model_file, input_file_scanvi = create_scanvi_model()
|
||||
output_file = random_h5mu_path()
|
||||
|
||||
run_component([
|
||||
"--input", input_file_scanvi,
|
||||
"--scanvi_reference_model", scanvi_model_file,
|
||||
"--reference_obs_label", "cell_ontology_class",
|
||||
"--reference_max_epochs", "10",
|
||||
"--query_max_epochs", "10",
|
||||
"--output", output_file
|
||||
])
|
||||
|
||||
assert os.path.exists(output_file), "Output file does not exist"
|
||||
|
||||
input_mudata = mu.read_h5mu(input_file_scanvi)
|
||||
output_mudata = mu.read_h5mu(output_file)
|
||||
|
||||
assert input_mudata.mod["rna"].n_obs == output_mudata.mod["rna"].n_obs, f"Number of observations changed"
|
||||
assert input_mudata.mod["rna"].n_vars == output_mudata.mod["rna"].n_vars, f"Number of variables changed"
|
||||
assert "scanvi_embedding" in output_mudata.mod["rna"].obsm.keys(), "Latent representation not added"
|
||||
assert "scanvi_pred" in output_mudata.mod["rna"].obs.keys(), "Predictions not added"
|
||||
assert "scanvi_probability" in output_mudata.mod["rna"].obs.keys(), "Probabilities not added"
|
||||
|
||||
assert_annotation_objects_equal(input_mudata.mod["prot"],
|
||||
output_mudata.mod["prot"])
|
||||
|
||||
|
||||
def test_raises(run_component, random_h5mu_path, create_scvi_model, create_scanvi_model):
|
||||
scvi_model_file, input_file_scvi = create_scvi_model(input_file, reference_file)
|
||||
scanvi_model_file, input_file_scanvi = create_scanvi_model()
|
||||
output_file = random_h5mu_path()
|
||||
|
||||
with pytest.raises(subprocess.CalledProcessError) as err:
|
||||
run_component([
|
||||
"--input", input_file_scanvi,
|
||||
"--scanvi_reference_model", scanvi_model_file,
|
||||
"--scvi_reference_model", scvi_model_file,
|
||||
"--reference_obs_label", "cell_ontology_class",
|
||||
"--reference_max_epochs", "10",
|
||||
"--query_max_epochs", "10",
|
||||
"--output", output_file
|
||||
])
|
||||
assert re.search(
|
||||
r"ValueError: Make sure to provide either an '--scvi_reference_model' or a '--scanvi_reference_model', but not both.",
|
||||
err.value.stdout.decode('utf-8')
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(pytest.main([__file__]))
|
||||
46
src/annotate/utils/query_reference_allignment.py
Normal file
46
src/annotate/utils/query_reference_allignment.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import re
|
||||
|
||||
import anndata as ad
|
||||
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
# END TEMPORARY WORKAROUND setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
# Helper functions
|
||||
def set_var_index(adata: ad.AnnData, var_name: str | None = None):
|
||||
if var_name:
|
||||
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
|
||||
else:
|
||||
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
|
||||
return adata
|
||||
|
||||
|
||||
def cross_check_genes(query: ad.AnnData, reference: ad.AnnData):
|
||||
logger.info("Detecting common vars based on gene ids")
|
||||
common_ens_ids = list(set(reference.var.index).intersection(set(query.var.index)))
|
||||
|
||||
logger.info(" reference n_vars: %i", reference.n_vars)
|
||||
logger.info(" input n_vars: %i", query.n_vars)
|
||||
logger.info(" intersect n_vars: %i", len(common_ens_ids))
|
||||
assert len(common_ens_ids) >= 100, "The intersection of genes between the query and reference dataset is too small."
|
||||
|
||||
return common_ens_ids
|
||||
|
||||
|
||||
def subset_vars(adata: ad.AnnData, var_column: str | None = None):
|
||||
if var_column:
|
||||
return adata[:, adata.var[var_column]]
|
||||
else:
|
||||
return adata
|
||||
75
src/filter/subset_obsp/config.vsh.yaml
Normal file
75
src/filter/subset_obsp/config.vsh.yaml
Normal file
@@ -0,0 +1,75 @@
|
||||
name: subset_obsp
|
||||
namespace: "filter"
|
||||
description: |
|
||||
Create a subset of an .obsp field in a mudata file, by filtering the columns based on the values of an .obs column. The resulting subset is moved to an .obsm slot.
|
||||
authors:
|
||||
- __merge__: /src/authors/dorien_roosen.yaml
|
||||
roles: [ author, maintainer ]
|
||||
argument_groups:
|
||||
- name: Input
|
||||
arguments:
|
||||
- name: "--input"
|
||||
type: file
|
||||
description: Input h5mu file
|
||||
direction: input
|
||||
required: true
|
||||
example: input.h5mu
|
||||
- name: "--modality"
|
||||
type: string
|
||||
default: "rna"
|
||||
required: false
|
||||
- name: "--input_obsp_key"
|
||||
type: string
|
||||
required: true
|
||||
description: The .obsp field to be filtered.
|
||||
- name: "--input_obs_key"
|
||||
type: string
|
||||
required: true
|
||||
description: The .obs column to filter on.
|
||||
- name: "--input_obs_value"
|
||||
type: string
|
||||
required: true
|
||||
description: The value to filter on in the .obs column.
|
||||
- name: Output
|
||||
arguments:
|
||||
- name: "--output"
|
||||
type: file
|
||||
description: Output h5mu file.
|
||||
direction: output
|
||||
example: output.h5mu
|
||||
- name: "--output_obsm_key"
|
||||
type: string
|
||||
required: true
|
||||
description: The .obsm key to store the subset in.
|
||||
- name: "--output_compression"
|
||||
type: string
|
||||
description: The compression format to be used on the output h5mu object.
|
||||
choices: ["gzip", "lzf"]
|
||||
required: false
|
||||
example: "gzip"
|
||||
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
- path: /src/utils/setup_logger.py
|
||||
test_resources:
|
||||
- type: python_script
|
||||
path: test.py
|
||||
- path: /resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu
|
||||
|
||||
engines:
|
||||
- type: docker
|
||||
image: python:3.12-slim
|
||||
setup:
|
||||
- type: apt
|
||||
packages:
|
||||
- procps
|
||||
- type: python
|
||||
__merge__: /src/base/requirements/anndata_mudata.yaml
|
||||
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
|
||||
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
directives:
|
||||
label: [singlecpu, lowmem]
|
||||
54
src/filter/subset_obsp/script.py
Normal file
54
src/filter/subset_obsp/script.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import mudata as mu
|
||||
|
||||
### VIASH START
|
||||
par = {
|
||||
'input': 'resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu',
|
||||
'modality': 'rna',
|
||||
'input_obsp_key': 'distances',
|
||||
'input_obs_key': 'leiden',
|
||||
'input_obs_value': '1',
|
||||
'output_obsm_key': "leiden_1",
|
||||
'output': 'subset_obsp_output.h5mu',
|
||||
'output_compression': None,
|
||||
}
|
||||
### VIASH END
|
||||
|
||||
# START TEMPORARY WORKAROUND setup_logger
|
||||
# reason: resources aren't available when using Nextflow fusion
|
||||
# from setup_logger import setup_logger
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
# END TEMPORARY WORKAROUND setup_logger
|
||||
logger = setup_logger()
|
||||
|
||||
def main():
|
||||
logger.info(f"Reading {par['input']}")
|
||||
mdata = mu.read_h5mu(par["input"])
|
||||
adata = mdata.mod[par["modality"]]
|
||||
|
||||
logger.info(f"Subset columns of obsp matrix under {par['input_obsp_key']} based on {par['input_obs_key']} == {par['input_obs_value']}")
|
||||
# .obsp, .obs and .obsm index and .obsp columns all have a dimension length of `n_obs`
|
||||
# the index dimensions remain unaltered, but .obsp columns will be subset
|
||||
obsp = adata.obsp[par["input_obsp_key"]]
|
||||
idx = adata.obs[par["input_obs_key"]].astype(str) == par["input_obs_value"]
|
||||
obsm_subset = obsp[:, idx]
|
||||
|
||||
logger.info(f"Writing subset obsp matrix to .obsm {par['output_obsm_key']}")
|
||||
adata.obsm[par["output_obsm_key"]] = obsm_subset
|
||||
|
||||
logger.info(f"Writing output to {par['output']}")
|
||||
mdata.write_h5mu(par["output"], compression=par["output_compression"])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
48
src/filter/subset_obsp/test.py
Normal file
48
src/filter/subset_obsp/test.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import sys
|
||||
import pytest
|
||||
import mudata as mu
|
||||
|
||||
## VIASH START
|
||||
meta = {
|
||||
'resources_dir': 'resources_test/pbmc_1k_protein_v3/'
|
||||
}
|
||||
## VIASH END
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_h5mu():
|
||||
input = mu.read_h5mu(f"{meta['resources_dir']}/pbmc_1k_protein_v3_mms.h5mu")
|
||||
input.mod["rna"].obs["filter_column"] = "group_2"
|
||||
input.mod["rna"].obs["filter_column"][:50] = "group_1"
|
||||
return input
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_path(write_mudata_to_file, input_h5mu):
|
||||
return write_mudata_to_file(input_h5mu)
|
||||
|
||||
|
||||
def test_subset_obsp(input_path, run_component, tmp_path):
|
||||
output_path = tmp_path / "output.h5mu"
|
||||
|
||||
# run component
|
||||
run_component([
|
||||
"--input", input_path,
|
||||
"--output", str(output_path),
|
||||
"--input_obsp_key", "distances",
|
||||
"--input_obs_key", "filter_column",
|
||||
"--input_obs_value", "group_1",
|
||||
"--output_obsm_key", "group_1"
|
||||
])
|
||||
|
||||
assert output_path.is_file(), "Output file not found"
|
||||
|
||||
# check output file
|
||||
mu_out = mu.read_h5mu(output_path)
|
||||
|
||||
assert "group_1" in mu_out.mod["rna"].obsm, "Output should contain group_1 in .obsm"
|
||||
assert mu_out.mod["rna"].obsm["group_1"].shape[1] == 50, "Obsm should only contain a subset of the original obsp matrix"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__]))
|
||||
@@ -1,7 +1,7 @@
|
||||
name: pynndescent_knn
|
||||
name: knn
|
||||
namespace: "labels_transfer"
|
||||
description: |
|
||||
This component generates a neighborhood graph based using the PyNNDescentTransformer, followed by classification using a k-nearest neighborhood vote.
|
||||
This component performs label transfer from reference to query using a K-Neirest Neighbors classifier.
|
||||
authors:
|
||||
- __merge__: /src/authors/dorien_roosen.yaml
|
||||
roles: [ maintainer, author ]
|
||||
@@ -11,6 +11,27 @@ authors:
|
||||
__merge__: ../api/common_arguments.yaml
|
||||
|
||||
argument_groups:
|
||||
- name: Input dataset (query) arguments
|
||||
arguments:
|
||||
- name: "--input_obsm_distances"
|
||||
type: string
|
||||
direction: input
|
||||
required: false
|
||||
example: bbknn_distances
|
||||
description: |
|
||||
The `.obsm` key of the input (query) dataset containing pre-calculated distances.
|
||||
If not provided, the distances will be calculated using PyNNDescent.
|
||||
Make sure the distance matrix contains distances relative to the reference dataset and were obtained in the same way as the reference embedding.
|
||||
|
||||
- name: Reference dataset arguments
|
||||
arguments:
|
||||
- name: "--reference_obsm_distances"
|
||||
type: string
|
||||
required: false
|
||||
description: |
|
||||
The `.obsm` key of the reference dataset containing pre-calculated distances.
|
||||
If not provided, the distances will be calculated using PyNNDescent.
|
||||
example: bbknn_distances
|
||||
|
||||
- name: KNN label transfer arguments
|
||||
arguments:
|
||||
@@ -30,6 +51,7 @@ argument_groups:
|
||||
description: |
|
||||
The number of neighbors to use in k-neighbor graph structure used for fast approximate nearest neighbor search with PyNNDescent.
|
||||
Larger values will result in more accurate search results at the cost of computation time.
|
||||
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
@@ -56,9 +78,7 @@ engines:
|
||||
packages:
|
||||
- pynndescent~=0.5.10
|
||||
- numpy<2
|
||||
test_setup:
|
||||
- type: python
|
||||
__merge__: [ /src/base/requirements/viashpy.yaml ]
|
||||
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
|
||||
|
||||
runners:
|
||||
- type: executable
|
||||
@@ -77,20 +77,46 @@ r_adata = r_mdata.mod[par["modality"]]
|
||||
logger.info("Checking arguments")
|
||||
par = check_arguments(par)
|
||||
|
||||
# Generating training and inference data
|
||||
logger.info("Generating training and inference data")
|
||||
train_X = get_reference_features(r_adata, par, logger)
|
||||
inference_X = get_query_features(q_adata, par, logger)
|
||||
if par["input_obsm_distances"] and par["reference_obsm_distances"]:
|
||||
logger.info("Using pre-calculated distances for KNN classification as provided in `--input_obsm_distances` and `--reference_obsm_distances`.")
|
||||
|
||||
neighbors_transformer = PyNNDescentTransformer(
|
||||
assert par["input_obsm_distances"] in q_adata.obsm, f"Make sure --input_obsm_distances {par['input_obsm_distances']} is a valid .obsm key. Found: {q_adata.obsm.keys()}."
|
||||
assert par["reference_obsm_distances"] in r_adata.obsm, f"Make sure --reference_obsm_distances {par['reference_obsm_distances']} is a valid .obsm key. Found: {r_adata.obsm.keys()}."
|
||||
|
||||
query_neighbors = q_adata.obsm[par["input_obsm_distances"]]
|
||||
reference_neighbors = r_adata.obsm[par["reference_obsm_distances"]]
|
||||
|
||||
if query_neighbors.shape[1] != reference_neighbors.shape[1]:
|
||||
raise ValueError("The number of neighbors in the query and reference distance matrices do not match. Make sure both distance matrices contain distances to the reference dataset.")
|
||||
|
||||
# Make sure the number of neighbors present in the distance matrix matches the requested number of neighbors in --n_neighbors
|
||||
# Otherwise reduce n_neighbors for KNN
|
||||
smallest_neighbor_count = min(
|
||||
np.diff(query_neighbors.indptr).min(),
|
||||
np.diff(reference_neighbors.indptr).min()
|
||||
)
|
||||
if smallest_neighbor_count < par["n_neighbors"]:
|
||||
logger.warning(f"The number of neighbors in the distance matrices is smaller than the requested number of neighbors in --n_neighbors. Reducing n_neighbors to {smallest_neighbor_count} for KNN Classification")
|
||||
par["n_neighbors"] = smallest_neighbor_count
|
||||
|
||||
elif par["input_obsm_distances"] or par["reference_obsm_distances"]:
|
||||
raise ValueError("Make sure to provide both --input_obsm_distances and --reference_obsm_distances if you want to use a pre-calculated distance matrix for KNN classification.")
|
||||
|
||||
elif not par["input_obsm_distances"] and not par["reference_obsm_distances"]:
|
||||
logger.info("No pre-calculated distances were provided. Calculating distances using the PyNNDescent algorithm.")
|
||||
# Generating training and inference data
|
||||
train_X = get_reference_features(r_adata, par, logger)
|
||||
inference_X = get_query_features(q_adata, par, logger)
|
||||
|
||||
neighbors_transformer = PyNNDescentTransformer(
|
||||
n_neighbors=par["n_neighbors"],
|
||||
parallel_batch_queries=True,
|
||||
)
|
||||
neighbors_transformer.fit(train_X)
|
||||
)
|
||||
neighbors_transformer.fit(train_X)
|
||||
|
||||
# Square sparse matrix with distances to n neighbors in reference data
|
||||
reference_neighbors = neighbors_transformer.transform(inference_X)
|
||||
query_neighbors = neighbors_transformer.transform(train_X)
|
||||
# Square sparse matrix with distances to n neighbors in reference data
|
||||
query_neighbors = neighbors_transformer.transform(inference_X)
|
||||
reference_neighbors = neighbors_transformer.transform(train_X)
|
||||
|
||||
# For each target, train a classifier and predict labels
|
||||
for obs_tar, obs_pred, obs_proba in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_probability"]):
|
||||
@@ -104,10 +130,14 @@ for obs_tar, obs_pred, obs_proba in zip(par["reference_obs_targets"], par["outp
|
||||
|
||||
logger.info(f"Using KNN classifier with {par['weights']} weights")
|
||||
train_y = r_adata.obs[obs_tar].to_numpy()
|
||||
classifier = KNeighborsClassifier(n_neighbors=par["n_neighbors"], metric="precomputed", weights=weights_dict[par["weights"]])
|
||||
classifier.fit(X=query_neighbors, y=train_y)
|
||||
predicted_labels = classifier.predict(reference_neighbors)
|
||||
probabilities = classifier.predict_proba(reference_neighbors).max(axis=1)
|
||||
classifier = KNeighborsClassifier(
|
||||
n_neighbors=par["n_neighbors"],
|
||||
metric="precomputed",
|
||||
weights=weights_dict[par["weights"]]
|
||||
)
|
||||
classifier.fit(X=reference_neighbors, y=train_y)
|
||||
predicted_labels = classifier.predict(query_neighbors)
|
||||
probabilities = classifier.predict_proba(query_neighbors).max(axis=1)
|
||||
|
||||
# save_results
|
||||
logger.info(f"Saving predictions to {obs_pred} and probabilities to {obs_proba} in obs")
|
||||
155
src/labels_transfer/knn/test.py
Normal file
155
src/labels_transfer/knn/test.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import re
|
||||
import subprocess
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
import anndata as ad
|
||||
import mudata as mu
|
||||
import numpy as np
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
## VIASH START
|
||||
meta = {
|
||||
'resources_dir': './resources_test/'
|
||||
}
|
||||
## VIASH END
|
||||
|
||||
reference_h5ad_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5ad"
|
||||
# convert reference to h5mu
|
||||
reference_adata = ad.read_h5ad(reference_h5ad_file)
|
||||
reference_mdata = mu.MuData({"rna": reference_adata})
|
||||
reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
|
||||
reference_mdata.write_h5mu(reference_file)
|
||||
input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
|
||||
|
||||
|
||||
def test_label_transfer(run_component, random_h5mu_path):
|
||||
|
||||
output = random_h5mu_path()
|
||||
|
||||
args = [
|
||||
"--input", input_file,
|
||||
"--modality", "rna",
|
||||
"--reference", reference_file,
|
||||
"--reference_obs_targets", "cell_type",
|
||||
"--output", output,
|
||||
"--n_neighbors", "5"
|
||||
]
|
||||
|
||||
run_component(args)
|
||||
|
||||
assert Path(output).is_file()
|
||||
|
||||
output_data = mu.read_h5mu(output)
|
||||
|
||||
assert "cell_type_pred" in output_data.mod["rna"].obs, f"Predictions cell_type_pred is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
assert "cell_type_probability" in output_data.mod["rna"].obs, f"Uncertainties cell_type_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("weights", ["uniform", "distance", "gaussian"])
|
||||
def test_label_transfer_prediction_columns(run_component, weights, random_h5mu_path):
|
||||
|
||||
output = random_h5mu_path()
|
||||
|
||||
args = [
|
||||
"--input", input_file,
|
||||
"--modality", "rna",
|
||||
"--reference", reference_file,
|
||||
"--reference_obs_targets", "cell_type",
|
||||
"--weights", weights,
|
||||
"--output", output,
|
||||
"--output_obs_probability", "test_probability",
|
||||
"--output_obs_predictions", "test_prediction",
|
||||
"--n_neighbors", "5"
|
||||
]
|
||||
|
||||
run_component(args)
|
||||
|
||||
assert Path(output).is_file()
|
||||
|
||||
output_data = mu.read_h5mu(output)
|
||||
|
||||
assert "test_prediction" in output_data.mod["rna"].obs, f"Predictions test_prediction is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
assert "test_probability" in output_data.mod["rna"].obs, f"Uncertainties test_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
|
||||
|
||||
def test_label_transfer_prediction_precomputed_neighbor_graph(run_component, random_h5mu_path):
|
||||
|
||||
output = random_h5mu_path()
|
||||
|
||||
# Add mock distance matrix to obsm slot
|
||||
reference_mdata = mu.read_h5mu(reference_file)
|
||||
ref_distances = np.random.rand(400, 400)
|
||||
ref_distances[ref_distances < 0.5] = 0
|
||||
ref_distances = csr_matrix(ref_distances)
|
||||
reference_mdata.mod["rna"].obsm["distances"] = ref_distances
|
||||
reference_mdata.write_h5mu(reference_file)
|
||||
|
||||
query_mdata = mu.read_h5mu(input_file)
|
||||
query_distances = np.random.rand(713, 400)
|
||||
query_distances[query_distances < 0.5] = 0
|
||||
query_distances = csr_matrix(query_distances)
|
||||
query_mdata.mod["rna"].obsm["distances"] = query_distances
|
||||
query_mdata.write_h5mu(input_file)
|
||||
|
||||
args = [
|
||||
"--input", input_file,
|
||||
"--modality", "rna",
|
||||
"--reference", reference_file,
|
||||
"--reference_obs_targets", "cell_type",
|
||||
"--output", output,
|
||||
"--input_obsm_distances", "distances",
|
||||
"--reference_obsm_distances", "distances",
|
||||
"--output_obs_probability", "test_probability",
|
||||
"--output_obs_predictions", "test_prediction",
|
||||
"--n_neighbors", "5"
|
||||
]
|
||||
|
||||
run_component(args)
|
||||
|
||||
assert Path(output).is_file()
|
||||
|
||||
output_data = mu.read_h5mu(output)
|
||||
|
||||
assert "test_prediction" in output_data.mod["rna"].obs, f"Predictions test_prediction is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
assert "test_probability" in output_data.mod["rna"].obs, f"Uncertainties test_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
|
||||
|
||||
def test_raises_distance_matrix_dimensions(run_component, random_h5mu_path):
|
||||
|
||||
output = random_h5mu_path()
|
||||
|
||||
reference_mdata = mu.read_h5mu(reference_file)
|
||||
ref_distances = np.random.rand(400, 100)
|
||||
ref_distances[ref_distances < 0.5] = 0
|
||||
ref_distances = csr_matrix(ref_distances)
|
||||
reference_mdata.mod["rna"].obsm["distances"] = ref_distances
|
||||
reference_mdata.write_h5mu(reference_file)
|
||||
|
||||
query_mdata = mu.read_h5mu(input_file)
|
||||
query_distances = np.random.rand(713, 400)
|
||||
query_distances[query_distances < 0.5] = 0
|
||||
query_distances = csr_matrix(query_distances)
|
||||
query_mdata.mod["rna"].obsm["distances"] = query_distances
|
||||
query_mdata.write_h5mu(input_file)
|
||||
|
||||
with pytest.raises(subprocess.CalledProcessError) as err:
|
||||
run_component([
|
||||
"--input", input_file,
|
||||
"--modality", "rna",
|
||||
"--reference", reference_file,
|
||||
"--reference_obs_targets", "cell_type",
|
||||
"--output", output,
|
||||
"--input_obsm_distances", "distances",
|
||||
"--reference_obsm_distances", "distances",
|
||||
"--output_obs_probability", "test_probability",
|
||||
"--output_obs_predictions", "test_prediction",
|
||||
"--n_neighbors", "5"
|
||||
])
|
||||
assert re.search(
|
||||
r"ValueError: The number of neighbors in the query and reference distance matrices do not match. Make sure both distance matrices contain distances to the reference dataset.",
|
||||
err.value.stdout.decode('utf-8')
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(pytest.main([__file__]))
|
||||
@@ -1,70 +0,0 @@
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
import anndata as ad
|
||||
import mudata as mu
|
||||
|
||||
## VIASH START
|
||||
meta = {
|
||||
'resources_dir': './resources_test/'
|
||||
}
|
||||
## VIASH END
|
||||
|
||||
reference_h5ad_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5ad"
|
||||
# convert reference to h5mu
|
||||
reference_adata = ad.read_h5ad(reference_h5ad_file)
|
||||
reference_mdata = mu.MuData({"rna": reference_adata})
|
||||
reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
|
||||
reference_mdata.write_h5mu(reference_file)
|
||||
input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
|
||||
|
||||
|
||||
def test_label_transfer(run_component):
|
||||
|
||||
args = [
|
||||
"--input", input_file,
|
||||
"--modality", "rna",
|
||||
"--reference", reference_file,
|
||||
"--reference_obs_targets", "cell_type",
|
||||
"--output", "output.h5mu",
|
||||
"--n_neighbors", "5"
|
||||
]
|
||||
|
||||
run_component(args)
|
||||
|
||||
assert Path("output.h5mu").is_file()
|
||||
|
||||
output_data = mu.read_h5mu("output.h5mu")
|
||||
|
||||
assert "cell_type_pred" in output_data.mod["rna"].obs, f"Predictions cell_type_pred is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
assert "cell_type_probability" in output_data.mod["rna"].obs, f"Uncertainties cell_type_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("weights", ["uniform", "distance", "gaussian"])
|
||||
def test_label_transfer_prediction_columns(run_component, weights):
|
||||
|
||||
output = f"output_{weights}.h5mu"
|
||||
|
||||
args = [
|
||||
"--input", input_file,
|
||||
"--modality", "rna",
|
||||
"--reference", reference_file,
|
||||
"--reference_obs_targets", "cell_type",
|
||||
"--weights", weights,
|
||||
"--output", output,
|
||||
"--output_obs_probability", "test_probability",
|
||||
"--output_obs_predictions", "test_prediction",
|
||||
"--n_neighbors", "5"
|
||||
]
|
||||
|
||||
run_component(args)
|
||||
|
||||
assert Path(output).is_file()
|
||||
|
||||
output_data = mu.read_h5mu(output)
|
||||
|
||||
assert "test_prediction" in output_data.mod["rna"].obs, f"Predictions test_prediction is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
assert "test_probability" in output_data.mod["rna"].obs, f"Uncertainties test_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(pytest.main([__file__]))
|
||||
@@ -114,7 +114,7 @@ resources:
|
||||
- path: make_rhap_reference_2.2.1_nodocker.cwl
|
||||
test_resources:
|
||||
- type: bash_script
|
||||
path: run_test.sh
|
||||
path: test.sh
|
||||
- path: /resources_test/reference_gencodev41_chr1/reference.fa.gz
|
||||
- path: /resources_test/reference_gencodev41_chr1/reference.gtf.gz
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
## VIASH START
|
||||
meta_executable="bin/viash run src/reference/make_reference/config.vsh.yaml --"
|
||||
@@ -53,7 +53,7 @@ resources:
|
||||
path: script.sh
|
||||
test_resources:
|
||||
- type: bash_script
|
||||
path: run_test.sh
|
||||
path: test.sh
|
||||
- path: /resources_test/reference_gencodev41_chr1
|
||||
engines:
|
||||
- type: docker
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
set -eo pipefail
|
||||
|
||||
## VIASH START
|
||||
par_genome_fasta="resources_test/reference_gencodev41_chr1/reference.fa.gz"
|
||||
|
||||
@@ -33,7 +33,7 @@ resources:
|
||||
path: script.sh
|
||||
test_resources:
|
||||
- type: bash_script
|
||||
path: run_test.sh
|
||||
path: test.sh
|
||||
- path: /resources_test/reference_gencodev41_chr1
|
||||
|
||||
engines:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
set -eo pipefail
|
||||
|
||||
## VIASH START
|
||||
par_genome_fasta="resources_test/reference_gencodev41_chr1/reference.fa.gz"
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
# set -eo pipefail
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
## VIASH START
|
||||
meta_resources_dir="./resources_test"
|
||||
|
||||
@@ -27,8 +27,6 @@ resources:
|
||||
- type: bash_script
|
||||
path: script.sh
|
||||
test_resources:
|
||||
# - type: bash_script
|
||||
# path: run_test.sh
|
||||
- type: python_script
|
||||
path: test.py
|
||||
- path: /resources_test/reference_gencodev41_chr1
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
## VIASH START
|
||||
meta_executable="bin/viash run src/reference/cellranger_mkgtf/config.vsh.yaml --"
|
||||
## VIASH END
|
||||
|
||||
# create temporary directory
|
||||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX")
|
||||
function clean_up {
|
||||
rm -rf "$tmpdir"
|
||||
}
|
||||
trap clean_up EXIT
|
||||
|
||||
zcat "$meta_resources_dir/reference_gencodev41_chr1/reference.gtf.gz" | awk '$4 < 50001 {print ;}' | gzip > "$tmpdir/reference_small.gtf.gz"
|
||||
|
||||
expected_gene_types=("transcribed_unprocessed_pseudogene" "miRNA")
|
||||
attribute_values=$(printf 'gene_type:%s,' "${expected_gene_types[@]}")
|
||||
attribute_values=${attribute_values%,} # remove trailing comma
|
||||
echo $attribute_values
|
||||
|
||||
echo "> Running $meta_name, writing to $tmpdir."
|
||||
$meta_executable \
|
||||
--input_gtf "$tmpdir/reference_small.gtf.gz" \
|
||||
--output_gtf "$tmpdir/myreference_filtered.gtf.gz" \
|
||||
--attribute "$attribute_values" \
|
||||
---cpus ${meta_memory_gb:-1} \
|
||||
---memory ${meta_memory_gb:-2}GB
|
||||
|
||||
exit_code=$?
|
||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
||||
|
||||
echo ">> Checking whether output can be found"
|
||||
[[ ! -f "$tmpdir/myreference_filtered.gtf.gz" ]] && echo "Output gtf file could not be found!" && exit 1
|
||||
|
||||
echo ">> Checking attribute 'gene_type' in output gtf file"
|
||||
unique_gene_types=$(zcat "$tmpdir/myreference_filtered.gtf.gz" | awk -F'\t' '$9 ~ /gene_type/ { split($9, a, ";"); for(i in a) if(a[i] ~ /gene_type/) print a[i] }' | sed 's/.*gene_type "\(.*\)".*/\1/' | sort -u)
|
||||
echo "Expected gene types: ${expected_gene_types[@]}"
|
||||
echo "Unique gene types: $unique_gene_types"
|
||||
if [[ "${#expected_gene_types[@]}" != "$(echo "$unique_gene_types" | wc -w)" ]]; then
|
||||
echo "Error: Not all expected gene types were found in the output gtf file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "> Test succeeded!"
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
set -eo pipefail
|
||||
|
||||
## VIASH START
|
||||
par_input_gtf="resources_test/reference_gencodev41_chr1/reference.gtf.gz"
|
||||
|
||||
@@ -49,7 +49,7 @@ resources:
|
||||
path: script.sh
|
||||
test_resources:
|
||||
- type: bash_script
|
||||
path: run_test.sh
|
||||
path: test.sh
|
||||
engines:
|
||||
- type: docker
|
||||
image: ubuntu:22.04
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
## VIASH START
|
||||
meta_executable="bin/viash run src/reference/make_reference/config.vsh.yaml --"
|
||||
## VIASH END
|
||||
|
||||
echo "> Running $meta_name."
|
||||
fasta="myreference.fa.gz"
|
||||
gtf="myreference.gtf.gz"
|
||||
|
||||
wget https://ftp.ensembl.org/pub/release-109/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz
|
||||
wget https://ftp.ensembl.org/pub/release-109/gtf/homo_sapiens/Homo_sapiens.GRCh38.109.chr.gtf.gz
|
||||
wget https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip
|
||||
|
||||
$meta_executable \
|
||||
--genome_fasta "Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz" \
|
||||
--transcriptome_gtf "Homo_sapiens.GRCh38.109.chr.gtf.gz" \
|
||||
--ercc "ERCC92.zip" \
|
||||
--subset_regex "(ERCC-00002|1)" \
|
||||
--output_fasta $fasta \
|
||||
--output_gtf $gtf
|
||||
|
||||
exit_code=$?
|
||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
||||
|
||||
echo ">> Checking whether output can be found"
|
||||
[[ ! -f $fasta ]] && echo "Output fasta file could not be found!" && exit 1
|
||||
[[ ! -f $gtf ]] && echo "Output gtf file could not be found!" && exit 1
|
||||
|
||||
echo ">> Checking contents of fasta"
|
||||
if ! zgrep -q '>1' $fasta; then
|
||||
echo "Could not find chromosome '1' in output reference!"
|
||||
exit 1
|
||||
fi
|
||||
if ! zgrep -q '>ERCC-00002' $fasta; then
|
||||
echo "Could not find ERCC-00002 in output reference!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "> Test succeeded!"
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eou pipefail
|
||||
set -eo pipefail
|
||||
|
||||
## VIASH START
|
||||
par_genome_fasta="https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz"
|
||||
|
||||
80
src/reference/make_reference/test.sh
Normal file
80
src/reference/make_reference/test.sh
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
## VIASH START
|
||||
meta_executable="bin/viash run src/reference/make_reference/config.vsh.yaml --"
|
||||
## VIASH END
|
||||
|
||||
# Fetch test data
|
||||
echo ">> Fetching test data"
|
||||
|
||||
wget https://ftp.ensembl.org/pub/release-109/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz
|
||||
wget https://ftp.ensembl.org/pub/release-109/gtf/homo_sapiens/Homo_sapiens.GRCh38.109.chr.gtf.gz
|
||||
wget https://assets.thermofisher.com/TFS-Assets/LSG/manuals/ERCC92.zip
|
||||
|
||||
# Test 1
|
||||
echo ">> Test1"
|
||||
mkdir test1
|
||||
pushd test1
|
||||
fasta="myreference.fa.gz"
|
||||
gtf="myreference.gtf.gz"
|
||||
|
||||
"$meta_executable" \
|
||||
--genome_fasta "../Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz" \
|
||||
--transcriptome_gtf "../Homo_sapiens.GRCh38.109.chr.gtf.gz" \
|
||||
--ercc "../ERCC92.zip" \
|
||||
--subset_regex "(ERCC-00002|1)" \
|
||||
--output_fasta $fasta \
|
||||
--output_gtf $gtf
|
||||
|
||||
exit_code=$?
|
||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
||||
|
||||
echo ">> Checking whether output can be found"
|
||||
[[ ! -f $fasta ]] && echo "Output fasta file could not be found!" && exit 1
|
||||
[[ ! -f $gtf ]] && echo "Output gtf file could not be found!" && exit 1
|
||||
|
||||
echo ">> Checking contents of fasta"
|
||||
if ! zgrep -q '>1' $fasta; then
|
||||
echo "Could not find chromosome '1' in output reference!"
|
||||
exit 1
|
||||
fi
|
||||
if ! zgrep -q '>ERCC-00002' $fasta; then
|
||||
echo "Could not find ERCC-00002 in output reference!"
|
||||
exit 1
|
||||
fi
|
||||
popd
|
||||
|
||||
# Test 2
|
||||
echo ">> Test 2"
|
||||
mkdir test2
|
||||
pushd test2
|
||||
fasta="myreference.fa.gz"
|
||||
gtf="myreference.gtf.gz"
|
||||
|
||||
"$meta_executable" \
|
||||
--genome_fasta "../Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz" \
|
||||
--transcriptome_gtf "../Homo_sapiens.GRCh38.109.chr.gtf.gz" \
|
||||
--output_fasta $fasta \
|
||||
--output_gtf $gtf
|
||||
|
||||
exit_code=$?
|
||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
||||
|
||||
echo ">> Checking whether output can be found"
|
||||
[[ ! -f $fasta ]] && echo "Output fasta file could not be found!" && exit 1
|
||||
[[ ! -f $gtf ]] && echo "Output gtf file could not be found!" && exit 1
|
||||
|
||||
echo ">> Checking contents of fasta"
|
||||
if ! zgrep -q '>1' $fasta; then
|
||||
echo "Could not find chromosome '1' in output reference!"
|
||||
exit 1
|
||||
fi
|
||||
if zgrep -q '>ERCC-00002' $fasta; then
|
||||
echo "Should not find ERCC-00002 in output reference!"
|
||||
exit 1
|
||||
fi
|
||||
popd
|
||||
|
||||
echo "> Test succeeded!"
|
||||
@@ -1,22 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
viash ns build -q gdo_singlesample
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/gdo/gdo_singlesample/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-with-trace work/trace.txt \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
|
||||
@@ -43,8 +43,7 @@ workflow run_wf {
|
||||
]
|
||||
return newState
|
||||
},
|
||||
toState: ["output": "output"],
|
||||
auto: [ publish: true ]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
emit:
|
||||
|
||||
@@ -5,13 +5,13 @@ include { gdo_singlesample } from params.rootDir + "/target/nextflow/workflows/g
|
||||
params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("10x_5k_lung_crispr/SC3_v3_NextGem_DI_CRISPR_A549_5K.h5mu"),
|
||||
input: resources_test.resolve("10x_5k_lung_crispr/SC3_v3_NextGem_DI_CRISPR_A549_5K.h5mu"),
|
||||
min_counts: 3,
|
||||
max_counts: 10000000,
|
||||
min_guides_per_cell: 2,
|
||||
|
||||
@@ -1,22 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/bd_rhapsody/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
|
||||
@@ -6,16 +6,16 @@ include { bd_rhapsody_test } from params.rootDir + "/target/nextflow/test_workfl
|
||||
params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList(
|
||||
[
|
||||
[
|
||||
id: "foo",
|
||||
reads: file("${params.resources_test}/bdrhap_5kjrt/raw/12*.fastq.gz"),
|
||||
reference_archive: file(params.resources_test).resolve("reference_gencodev41_chr1/reference_bd_rhapsody.tar.gz"),
|
||||
abseq_reference: file(params.resources_test).resolve("bdrhap_5kjrt/raw/BDAbSeq_ImmuneDiscoveryPanel.fasta"),
|
||||
reference_archive: resources_test.resolve("reference_gencodev41_chr1/reference_bd_rhapsody.tar.gz"),
|
||||
abseq_reference: resources_test.resolve("bdrhap_5kjrt/raw/BDAbSeq_ImmuneDiscoveryPanel.fasta"),
|
||||
cell_calling_data: "mRNA",
|
||||
exact_cell_count: 4900
|
||||
]
|
||||
|
||||
@@ -1,23 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
viash ns build -q ingestion/cellranger_mapping --setup cb --platform nextflow
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/cellranger_mapping/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -18,8 +18,7 @@ workflow run_wf {
|
||||
toState: [
|
||||
"input": "output",
|
||||
"output_raw": "output"
|
||||
],
|
||||
auto: [ publish: true ]
|
||||
]
|
||||
)
|
||||
// split output dir into map
|
||||
| cellranger_count_split.run(
|
||||
@@ -49,14 +48,9 @@ workflow run_wf {
|
||||
"input_metrics_summary": state.metrics_summary
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
[
|
||||
"output_raw": state.output_raw,
|
||||
"output_h5mu": output.output
|
||||
]
|
||||
},
|
||||
auto: [ publish: true ],
|
||||
toState: ["output_h5mu": "output"]
|
||||
)
|
||||
| setState(["output_raw", "output_h5mu"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -7,11 +7,13 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "foo",
|
||||
input: file(params.resources_test).resolve("cellranger_tiny_fastq/cellranger_tiny_fastq"),
|
||||
reference: file(params.resources_test).resolve("cellranger_tiny_fastq/cellranger_tiny_ref"),
|
||||
input: resources_test.resolve("cellranger_tiny_fastq/cellranger_tiny_fastq"),
|
||||
reference: resources_test.resolve("cellranger_tiny_fastq/cellranger_tiny_ref"),
|
||||
output_type: "filtered",
|
||||
]
|
||||
])
|
||||
|
||||
@@ -1,32 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=22.10.3
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/cellranger_multi/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile no_publish,docker \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt
|
||||
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/cellranger_multi/test.nf \
|
||||
-entry test_wf2 \
|
||||
-resume \
|
||||
-profile no_publish,docker \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -7,20 +7,32 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "foo",
|
||||
input:[file(params.resources_test).resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_GEX_1_subset_S1_L001_R1_001.fastq.gz"),
|
||||
file(params.resources_test).resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_GEX_1_subset_S1_L001_R2_001.fastq.gz"),
|
||||
file(params.resources_test).resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_AB_subset_S2_L004_R1_001.fastq.gz"),
|
||||
file(params.resources_test).resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_AB_subset_S2_L004_R2_001.fastq.gz"),
|
||||
file(params.resources_test).resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_VDJ_subset_S1_L001_R1_001.fastq.gz"),
|
||||
file(params.resources_test).resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_VDJ_subset_S1_L001_R2_001.fastq.gz")],
|
||||
gex_reference: file(params.resources_test).resolve("reference_gencodev41_chr1/reference_cellranger.tar.gz"),
|
||||
vdj_reference: file(params.resources_test).resolve("10x_5k_anticmv/raw/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.0.0.tar.gz"),
|
||||
feature_reference: file(params.resources_test).resolve("10x_5k_anticmv/raw/feature_reference.csv"),
|
||||
library_id: ["5k_human_antiCMV_T_TBNK_connect_GEX_1_subset", "5k_human_antiCMV_T_TBNK_connect_AB_subset", "5k_human_antiCMV_T_TBNK_connect_VDJ_subset"],
|
||||
library_type: ["Gene Expression", "Antibody Capture", "VDJ"]
|
||||
input:[
|
||||
resources_test.resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_GEX_1_subset_S1_L001_R1_001.fastq.gz"),
|
||||
resources_test.resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_GEX_1_subset_S1_L001_R2_001.fastq.gz"),
|
||||
resources_test.resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_AB_subset_S2_L004_R1_001.fastq.gz"),
|
||||
resources_test.resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_AB_subset_S2_L004_R2_001.fastq.gz"),
|
||||
resources_test.resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_VDJ_subset_S1_L001_R1_001.fastq.gz"),
|
||||
resources_test.resolve("10x_5k_anticmv/raw/5k_human_antiCMV_T_TBNK_connect_VDJ_subset_S1_L001_R2_001.fastq.gz")
|
||||
],
|
||||
gex_reference: resources_test.resolve("reference_gencodev41_chr1/reference_cellranger.tar.gz"),
|
||||
vdj_reference: resources_test.resolve("10x_5k_anticmv/raw/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.0.0.tar.gz"),
|
||||
feature_reference: resources_test.resolve("10x_5k_anticmv/raw/feature_reference.csv"),
|
||||
library_id: [
|
||||
"5k_human_antiCMV_T_TBNK_connect_GEX_1_subset",
|
||||
"5k_human_antiCMV_T_TBNK_connect_AB_subset",
|
||||
"5k_human_antiCMV_T_TBNK_connect_VDJ_subset"
|
||||
],
|
||||
library_type: [
|
||||
"Gene Expression",
|
||||
"Antibody Capture",
|
||||
"VDJ"
|
||||
]
|
||||
]
|
||||
])
|
||||
| map{ state -> [state.id, state] }
|
||||
@@ -44,8 +56,9 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
// Test cell multiplexing
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "foo",
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
@@ -14,9 +12,7 @@ nextflow \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt \
|
||||
-resume
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
@@ -24,6 +20,4 @@ nextflow \
|
||||
-entry test_wf2 \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt \
|
||||
-resume
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -4,9 +4,10 @@ workflow run_wf {
|
||||
|
||||
main:
|
||||
// perform correction if so desired
|
||||
mid1_corrected = input_ch
|
||||
| filter{ it[1].perform_correction }
|
||||
|
||||
output_ch = input_ch
|
||||
| cellbender_remove_background.run(
|
||||
runIf: {id, state -> state.perform_correction},
|
||||
fromState: { id, state ->
|
||||
[
|
||||
input: state.input,
|
||||
@@ -16,17 +17,13 @@ workflow run_wf {
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
state + [input: output.output, layer: "cellbender_corrected"]
|
||||
state + ["input": output.output, "layer": "cellbender_corrected"]
|
||||
}
|
||||
)
|
||||
mid1_uncorrected = input_ch
|
||||
| filter{ ! it[1].perform_correction }
|
||||
mid1 = mid1_corrected.mix(mid1_uncorrected)
|
||||
|
||||
// perform filtering if so desired
|
||||
mid2_filtered = mid1
|
||||
| filter{ it[1].min_genes != null || it[1].min_counts != null }
|
||||
| filter_with_counts.run(
|
||||
runIf: {id, state ->
|
||||
state.min_genes != null || state.min_counts != null
|
||||
},
|
||||
fromState: { id, state ->
|
||||
[
|
||||
input: state.input,
|
||||
@@ -39,16 +36,14 @@ workflow run_wf {
|
||||
},
|
||||
toState: [input: "output"]
|
||||
)
|
||||
mid2_unfiltered = mid1
|
||||
| filter{ it[1].min_genes == null && it[1].min_counts == null }
|
||||
mid2 = mid2_filtered.mix(mid2_unfiltered)
|
||||
|
||||
// return output map
|
||||
output_ch = mid2
|
||||
// Make sure to use the correct ouput file names,
|
||||
// irrespective wether or not any of the above
|
||||
// components were run
|
||||
| publish.run(
|
||||
fromState: [ input: "input", output: "output" ],
|
||||
auto: [ publish: true ]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -8,11 +8,13 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "foo",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input_og: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input_og: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
perform_correction: true,
|
||||
min_genes: 100,
|
||||
min_counts: 1000,
|
||||
@@ -55,9 +57,8 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -5,13 +5,10 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=24.04.4
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/conversion/test.nf \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-with-trace work/trace.txt
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -25,11 +25,9 @@ workflow run_wf {
|
||||
}
|
||||
passed_state
|
||||
},
|
||||
toState: {id, output, state, comp ->
|
||||
["output": output.output]
|
||||
},
|
||||
auto: [publish: true],
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output": "output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -7,30 +7,32 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "10xh5_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5"),
|
||||
input_type: "10xh5",
|
||||
modality: null
|
||||
],
|
||||
[
|
||||
id: "10xmtx_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix"),
|
||||
input_type: "10xmtx",
|
||||
modality: null,
|
||||
output: "\$id.h5mu"
|
||||
],
|
||||
[
|
||||
id: "10xmtx",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix"),
|
||||
input_type: "10xmtx",
|
||||
modality: "rna",
|
||||
output: "\$key.h5mu"
|
||||
],
|
||||
[
|
||||
id: "h5ad",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5ad"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix_rna.h5ad"),
|
||||
input_type: "h5ad",
|
||||
modality: "rna",
|
||||
output: "\$key.h5mu"
|
||||
|
||||
@@ -1,23 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
viash ns build -q 'workflows/ingestion/demux'
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/demux/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-with-trace work/trace.txt \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -7,24 +7,26 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
// or when running from s3:
|
||||
Channel.fromList([
|
||||
[
|
||||
id: "mkfastq_test",
|
||||
input: file(params.resources_test).resolve("cellranger_tiny_bcl/bcl"),
|
||||
sample_sheet: file(params.resources_test).resolve("cellranger_tiny_bcl/bcl/sample_sheet.csv"),
|
||||
input: resources_test.resolve("cellranger_tiny_bcl/bcl"),
|
||||
sample_sheet: resources_test.resolve("cellranger_tiny_bcl/bcl/sample_sheet.csv"),
|
||||
demultiplexer: "mkfastq"
|
||||
],
|
||||
[
|
||||
id: "bclconvert_test",
|
||||
input: file(params.resources_test).resolve("cellranger_tiny_bcl/bcl2/"),
|
||||
sample_sheet: file(params.resources_test).resolve("cellranger_tiny_bcl/bcl2/sample_sheet.csv"),
|
||||
input: resources_test.resolve("cellranger_tiny_bcl/bcl2/"),
|
||||
sample_sheet: resources_test.resolve("cellranger_tiny_bcl/bcl2/sample_sheet.csv"),
|
||||
demultiplexer: "bclconvert"
|
||||
],
|
||||
[
|
||||
id: "bcl2fastq_test",
|
||||
input: file(params.resources_test).resolve("cellranger_tiny_bcl/bcl"),
|
||||
sample_sheet: file(params.resources_test).resolve("cellranger_tiny_bcl/bcl/sample_sheet.csv"),
|
||||
input: resources_test.resolve("cellranger_tiny_bcl/bcl"),
|
||||
sample_sheet: resources_test.resolve("cellranger_tiny_bcl/bcl/sample_sheet.csv"),
|
||||
demultiplexer: "bcl2fastq",
|
||||
ignore_missing: true
|
||||
]
|
||||
|
||||
@@ -1,20 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=23.04.2
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/ingestion/make_reference/test.nf \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-resume
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -6,12 +6,14 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "gencode_v41_ercc",
|
||||
genome_fasta: file(params.resources_test).resolve("reference_gencodev41_chr1/reference.fa.gz"),
|
||||
transcriptome_gtf: file(params.resources_test).resolve("reference_gencodev41_chr1/reference.gtf.gz"),
|
||||
ercc: file(params.resources_test).resolve("reference_gencodev41_chr1/ERCC92.zip"),
|
||||
genome_fasta: resources_test.resolve("reference_gencodev41_chr1/reference.fa.gz"),
|
||||
transcriptome_gtf: resources_test.resolve("reference_gencodev41_chr1/reference.gtf.gz"),
|
||||
ercc: resources_test.resolve("reference_gencodev41_chr1/ERCC92.zip"),
|
||||
subset_regex: "(ERCC-00002|chr1)",
|
||||
target: ["cellranger", "bd_rhapsody", "star"]
|
||||
]
|
||||
|
||||
@@ -1,25 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/bbknn_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/bbknn_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf2 \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -86,11 +86,9 @@ workflow run_wf {
|
||||
"output_compression": "gzip"
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
[ output: output.output ]
|
||||
},
|
||||
auto: [publish: true]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -5,16 +5,19 @@ include { bbknn_leiden } from params.rootDir + "/target/nextflow/workflows/integ
|
||||
params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch =
|
||||
Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized"
|
||||
],
|
||||
[
|
||||
id: "no_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
leiden_resolution: []
|
||||
]
|
||||
@@ -45,7 +48,9 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch =
|
||||
Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -1,27 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/harmony_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/harmony_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf2 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -77,11 +77,9 @@ workflow run_wf {
|
||||
"output_compression": "gzip"
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
[ output: output.output ]
|
||||
},
|
||||
auto: [ publish: true ]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -6,11 +6,13 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch =
|
||||
Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
obs_covariates: "sample_id",
|
||||
embedding: "X_pca",
|
||||
@@ -19,7 +21,7 @@ workflow test_wf {
|
||||
],
|
||||
[
|
||||
id: "no_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
obs_covariates: "sample_id",
|
||||
embedding: "X_pca",
|
||||
@@ -53,7 +55,8 @@ workflow test_wf {
|
||||
|
||||
|
||||
workflow test_wf2 {
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch =
|
||||
Channel.fromList([
|
||||
|
||||
@@ -6,20 +6,18 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/scanorama_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
|
||||
nextflow run . \
|
||||
-main-script src/workflows/integration/scanorama_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf2 \
|
||||
-resume \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/scanorama_leiden/test.nf \
|
||||
-entry test_wf2 \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -74,11 +74,9 @@ workflow run_wf {
|
||||
"output_compression": "gzip"
|
||||
]
|
||||
},
|
||||
auto: [ publish: true ],
|
||||
toState: { id, output, state ->
|
||||
[ output: output.output ]
|
||||
}
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -6,16 +6,18 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
leiden_resolution: [1.0, 0.25],
|
||||
],
|
||||
[
|
||||
id: "no_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
leiden_resolution: [],
|
||||
]
|
||||
@@ -46,8 +48,8 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -1,29 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
viash ns build -q scgpt_leiden
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/scgpt_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/scgpt_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf2 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -152,11 +152,9 @@ workflow run_wf {
|
||||
"output": state.workflow_output
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
[ output: output.output ]
|
||||
},
|
||||
auto: [ publish: true ]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -6,13 +6,15 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("scgpt/test_resources/Kim2020_Lung_subset_preprocessed.h5mu"),
|
||||
model: file(params.resources_test).resolve("scgpt/source/best_model.pt"),
|
||||
model_config: file(params.resources_test).resolve("scgpt/source/args.json"),
|
||||
model_vocab: file(params.resources_test).resolve("scgpt/source/vocab.json"),
|
||||
input: resources_test.resolve("scgpt/test_resources/Kim2020_Lung_subset_preprocessed.h5mu"),
|
||||
model: resources_test.resolve("scgpt/source/best_model.pt"),
|
||||
model_config: resources_test.resolve("scgpt/source/args.json"),
|
||||
model_vocab: resources_test.resolve("scgpt/source/vocab.json"),
|
||||
input_layer: "log_normalized",
|
||||
obs_batch_label: "sample",
|
||||
n_hvg: 400,
|
||||
@@ -21,10 +23,10 @@ workflow test_wf {
|
||||
],
|
||||
[
|
||||
id: "no_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("scgpt/test_resources/Kim2020_Lung_subset_preprocessed.h5mu"),
|
||||
model: file(params.resources_test).resolve("scgpt/source/best_model.pt"),
|
||||
model_config: file(params.resources_test).resolve("scgpt/source/args.json"),
|
||||
model_vocab: file(params.resources_test).resolve("scgpt/source/vocab.json"),
|
||||
input: resources_test.resolve("scgpt/test_resources/Kim2020_Lung_subset_preprocessed.h5mu"),
|
||||
model: resources_test.resolve("scgpt/source/best_model.pt"),
|
||||
model_config: resources_test.resolve("scgpt/source/args.json"),
|
||||
model_vocab: resources_test.resolve("scgpt/source/vocab.json"),
|
||||
obs_batch_label: "sample",
|
||||
n_hvg: 400,
|
||||
seed: 1,
|
||||
@@ -59,7 +61,8 @@ workflow test_wf {
|
||||
|
||||
|
||||
workflow test_wf2 {
|
||||
resources_test = file("${params.rootDir}/resources_test/scgpt")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -1,19 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/scvi_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-resume
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -98,14 +98,9 @@ workflow run_wf {
|
||||
"output_compression": "gzip"
|
||||
]
|
||||
},
|
||||
auto: [ publish: true ],
|
||||
toState: { id, output, state ->
|
||||
[
|
||||
output: output.output,
|
||||
output_model: state.output_model
|
||||
]
|
||||
}
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output", "output_model"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -6,10 +6,12 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
obs_batch: "sample_id",
|
||||
max_epochs: 1,
|
||||
@@ -17,7 +19,7 @@ workflow test_wf {
|
||||
],
|
||||
[
|
||||
id: "no_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
layer: "log_normalized",
|
||||
obs_batch: "sample_id",
|
||||
output_model: "no_leiden_resolutions_test_model/",
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=23.04.2
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/integration/totalvi_leiden/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -139,15 +139,9 @@ workflow run_wf {
|
||||
"compression": "gzip"
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
[
|
||||
output: output.output,
|
||||
reference_model_path: state.reference_model_path,
|
||||
query_model_path: state.query_model_path
|
||||
]
|
||||
},
|
||||
auto: [ publish: true ]
|
||||
toState: ["output", "output"]
|
||||
)
|
||||
| setState(["output", "reference_model_path", "query_model_path"])
|
||||
emit:
|
||||
output_ch
|
||||
}
|
||||
|
||||
@@ -6,11 +6,13 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
reference: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
reference: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
prot_modality: "prot",
|
||||
prot_reference_modality: "prot",
|
||||
var_input: "filter_with_hvg",
|
||||
@@ -21,8 +23,8 @@ workflow test_wf {
|
||||
],
|
||||
[
|
||||
id: "no_prot_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
reference: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
reference: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
prot_modality: "prot",
|
||||
prot_reference_modality: "prot",
|
||||
var_input: "filter_with_hvg",
|
||||
@@ -34,8 +36,8 @@ workflow test_wf {
|
||||
],
|
||||
[
|
||||
id: "no_rna_leiden_resolutions_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
reference: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
reference: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
prot_modality: "prot",
|
||||
prot_reference_modality: "prot",
|
||||
var_input: "filter_with_hvg",
|
||||
|
||||
@@ -1,19 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/dimensionality_reduction/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-resume
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -44,11 +44,9 @@ workflow run_wf {
|
||||
"output_compression": "gzip"
|
||||
]
|
||||
},
|
||||
toState: { id, output, state ->
|
||||
[ output: output.output ]
|
||||
},
|
||||
auto: [ publish: true ]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -6,18 +6,19 @@ include { dimensionality_reduction_test } from params.rootDir + "/target/nextflo
|
||||
params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
// allow changing the resources_test dir
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
input_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
layer: "",
|
||||
output: "foo.final.h5mu"
|
||||
],
|
||||
[
|
||||
id: "pca_obsm_output_test",
|
||||
input: file(params.resources_test).resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
layer: "",
|
||||
output: "foo.final.h5mu"
|
||||
],
|
||||
|
||||
@@ -1,26 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
viash ns build -q process_batches
|
||||
|
||||
export NXF_VER=24.04.4
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_batches/test.nf \
|
||||
-entry test_wf \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-resume
|
||||
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
@@ -28,5 +20,4 @@ nextflow \
|
||||
-entry test_wf2 \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
-resume
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -215,7 +215,7 @@ workflow run_wf {
|
||||
"output": state.workflow_output,
|
||||
]
|
||||
},
|
||||
auto: [publish: true]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
|
||||
@@ -9,16 +9,18 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
input_ch = Channel.fromList([
|
||||
[
|
||||
id: "test",
|
||||
input: file(params.resources_test).resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
publish_dir: "foo/",
|
||||
clr_axis: 0
|
||||
],
|
||||
[
|
||||
id: "test2",
|
||||
input: file(params.resources_test).resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
publish_dir: "foo/",
|
||||
clr_axis: 1
|
||||
]
|
||||
@@ -52,8 +54,8 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
input_ch = Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -8,15 +8,10 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=23.10.3
|
||||
|
||||
viash ns build -q '^workflows'
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -26,7 +21,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
@@ -37,7 +31,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
@@ -48,7 +41,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config \
|
||||
@@ -57,9 +49,8 @@ nextflow \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf2 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf2 \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -67,7 +58,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf3 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -76,7 +66,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf4 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -85,7 +74,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf5 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -94,7 +82,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf6 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -103,7 +90,6 @@ nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/process_samples/test.nf \
|
||||
-entry test_wf7 \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -9,17 +9,19 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "mouse",
|
||||
input: file(params.resources_test).resolve("concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
publish_dir: "foo/",
|
||||
rna_min_counts: 2,
|
||||
output: "test.h5mu",
|
||||
],
|
||||
[
|
||||
id: "human",
|
||||
input: file(params.resources_test).resolve("concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
publish_dir: "foo/",
|
||||
rna_min_counts: 2,
|
||||
output: "test.h5mu",
|
||||
@@ -42,8 +44,8 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
@@ -94,8 +96,8 @@ workflow test_wf2 {
|
||||
}
|
||||
|
||||
workflow test_wf3 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
input_ch = Channel.fromList([
|
||||
[
|
||||
@@ -156,8 +158,8 @@ workflow test_wf3 {
|
||||
}
|
||||
|
||||
workflow test_wf4 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
@@ -187,8 +189,8 @@ workflow test_wf4 {
|
||||
}
|
||||
|
||||
workflow test_wf5 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
@@ -235,8 +237,8 @@ workflow test_wf5 {
|
||||
}
|
||||
|
||||
workflow test_wf6 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
@@ -298,8 +300,8 @@ workflow test_wf6 {
|
||||
// }
|
||||
|
||||
workflow test_wf7 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -4,12 +4,10 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/split_modalities/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -7,10 +7,12 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "mouse",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
publish_dir: "foo/",
|
||||
output: "modalities",
|
||||
output_types: "types.csv"
|
||||
|
||||
@@ -6,12 +6,10 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
viash ns build -q prot_multisample
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/prot/prot_multisample/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-with-trace work/trace.txt \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -6,17 +6,19 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "adt_samples_axis_0",
|
||||
sample_id: "pbmc",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
clr_axis: 0
|
||||
],
|
||||
[
|
||||
id: "adt_samples_axis_1",
|
||||
sample_id: "pbmc",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu"),
|
||||
clr_axis: 1
|
||||
]
|
||||
])
|
||||
|
||||
@@ -6,15 +6,10 @@ REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
viash ns build -q prot_singlesample
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/multiomics/prot_singlesample/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-resume \
|
||||
-entry test_wf \
|
||||
-with-trace work/trace.txt \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
@@ -44,7 +44,6 @@ workflow run_wf {
|
||||
return newState
|
||||
},
|
||||
toState: ["output": "output"],
|
||||
auto: [ publish: true ]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
|
||||
@@ -6,10 +6,12 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "foo",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
min_counts: 3,
|
||||
max_counts: 100000,
|
||||
min_genes_per_cell: 2,
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
viash ns build -q '^workflows/qc/qc'
|
||||
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/qc/qc/test.nf \
|
||||
-entry test_wf \
|
||||
-resume \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -81,7 +81,7 @@ workflow run_wf {
|
||||
"compression": "gzip"
|
||||
]
|
||||
},
|
||||
auto: [ publish: true ]
|
||||
toState: ["output": "output"]
|
||||
)
|
||||
| setState(["output"])
|
||||
|
||||
|
||||
@@ -7,15 +7,17 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch =
|
||||
Channel.fromList([
|
||||
[
|
||||
id: "mouse_test",
|
||||
input: file(params.resources_test).resolve("concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
],
|
||||
[
|
||||
id: "human_test",
|
||||
input: file(params.resources_test).resolve("concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
|
||||
]
|
||||
])
|
||||
| map { state -> [state.id, state] }
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
viash ns build -q rna_multisample
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/rna/rna_multisample/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-resume \
|
||||
-entry test_wf \
|
||||
-with-trace work/trace.txt \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -6,10 +6,12 @@ params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
input: resources_test.resolve("concat_test_data/concatenated_brain_filtered_feature_bc_matrix_subset.h5mu"),
|
||||
output: "concatenated_file.final.h5mu"
|
||||
]
|
||||
])
|
||||
|
||||
@@ -1,30 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
export NXF_VER=21.10.6
|
||||
|
||||
viash ns build -q rna_singlesample
|
||||
# viash ns build -q 'filter|publish|qc|metadata' --parallel --setup cb
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-main-script src/workflows/rna/rna_singlesample/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf \
|
||||
-with-trace work/trace.txt \
|
||||
-profile docker,no_publish \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
nextflow run . \
|
||||
nextflow \
|
||||
run . \
|
||||
-entry test_wf2 \
|
||||
-main-script src/workflows/rna/rna_singlesample/test.nf \
|
||||
-profile docker,no_publish \
|
||||
-entry test_wf2 \
|
||||
-with-trace work/trace.txt \
|
||||
-c src/workflows/utils/labels_ci.config \
|
||||
-c src/workflows/utils/integration_tests.config
|
||||
|
||||
@@ -133,8 +133,8 @@ workflow run_wf {
|
||||
"layer": "layer",
|
||||
],
|
||||
args: [output_compression: "gzip"],
|
||||
auto: [ publish: true ]
|
||||
)
|
||||
| setState(["output": "output"])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -5,13 +5,13 @@ include { rna_singlesample } from params.rootDir + "/target/nextflow/workflows/r
|
||||
params.resources_test = params.rootDir + "/resources_test"
|
||||
|
||||
workflow test_wf {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
id: "mitochondrial_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
min_counts: 3,
|
||||
max_counts: 10000000,
|
||||
min_genes_per_cell: 2,
|
||||
@@ -27,7 +27,7 @@ workflow test_wf {
|
||||
],
|
||||
[
|
||||
id: "simple_execution_test",
|
||||
input: file(params.resources_test).resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
|
||||
min_counts: 3,
|
||||
max_counts: 10000000,
|
||||
min_genes_per_cell: 2,
|
||||
@@ -54,8 +54,8 @@ workflow test_wf {
|
||||
}
|
||||
|
||||
workflow test_wf2 {
|
||||
// allow changing the resources_test dir
|
||||
resources_test = file("${params.rootDir}/resources_test")
|
||||
|
||||
resources_test = file(params.resources_test)
|
||||
|
||||
output_ch = Channel.fromList([
|
||||
[
|
||||
|
||||
@@ -40,3 +40,5 @@ engines:
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
directives:
|
||||
label: [midmem, midcpu]
|
||||
427
target/executable/annotate/celltypist/.config.vsh.yaml
Normal file
427
target/executable/annotate/celltypist/.config.vsh.yaml
Normal file
@@ -0,0 +1,427 @@
|
||||
name: "celltypist"
|
||||
namespace: "annotate"
|
||||
version: "fix-integration-tests"
|
||||
authors:
|
||||
- name: "Jakub Majercik"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Contributor"
|
||||
links:
|
||||
email: "jakub@data-intuitive.com"
|
||||
github: "jakubmajercik"
|
||||
linkedin: "jakubmajercik"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Bioinformatics Engineer"
|
||||
- name: "Weiwei Schultz"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
role: "Contributor"
|
||||
organizations:
|
||||
- name: "Janssen R&D US"
|
||||
role: "Associate Director Data Sciences"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
description: "Input dataset (query) arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "The input (query) data to be labeled. Should be a .h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "input.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality to process."
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_layer"
|
||||
description: "The layer in the input data to be used for cell type annotation\
|
||||
\ if .X is not to be used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_query_gene_names"
|
||||
description: "The name of the adata var column in the input data containing gene\
|
||||
\ names; when no gene_name_layer is provided, the var index will be used.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Reference"
|
||||
description: "Arguments related to the reference dataset."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--reference"
|
||||
description: "The reference data to train the CellTypist classifiers on. Only\
|
||||
\ required if a pre-trained --model is not provided."
|
||||
info: null
|
||||
example:
|
||||
- "reference.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_layer"
|
||||
description: "The layer in the reference data to be used for cell type annotation\
|
||||
\ if .X is not to be used. Data are expected to be processed in the same way\
|
||||
\ as the --input query dataset."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_obs_target"
|
||||
description: "The name of the adata obs column in the reference data containing\
|
||||
\ cell type annotations."
|
||||
info: null
|
||||
default:
|
||||
- "cell_ontology_class"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_true"
|
||||
name: "--check_expression"
|
||||
description: "Whether to check the expression of the reference dataset to the\
|
||||
\ format reccomended by CellTypist.\nCellTypist requires data to be log-normalized\
|
||||
\ to 10000 counts per cell.\n"
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "string"
|
||||
name: "--var_reference_gene_names"
|
||||
description: "The name of the adata var column in the reference data containing\
|
||||
\ gene names; when no gene_name_layer is provided, the var index will be used.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Model arguments"
|
||||
description: "Model arguments."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--model"
|
||||
description: "Pretrained model in pkl format. If not provided, the model will\
|
||||
\ be trained on the reference data and --reference should be provided."
|
||||
info: null
|
||||
example:
|
||||
- "pretrained_model.pkl"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--feature_selection"
|
||||
description: "Whether to perform feature selection."
|
||||
info: null
|
||||
default:
|
||||
- false
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--majority_voting"
|
||||
description: "Whether to refine the predicted labels by running the majority voting\
|
||||
\ classifier after over-clustering."
|
||||
info: null
|
||||
default:
|
||||
- false
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--C"
|
||||
description: "Inverse of regularization strength in logistic regression."
|
||||
info: null
|
||||
default:
|
||||
- 1.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--max_iter"
|
||||
description: "Maximum number of iterations before reaching the minimum of the\
|
||||
\ cost function."
|
||||
info: null
|
||||
default:
|
||||
- 1000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_true"
|
||||
name: "--use_SGD"
|
||||
description: "Whether to use the stochastic gradient descent algorithm."
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "double"
|
||||
name: "--min_prop"
|
||||
description: "\"For the dominant cell type within a subcluster, the minimum proportion\
|
||||
\ of cells required to \nsupport naming of the subcluster by this cell type.\
|
||||
\ Ignored if majority_voting is set to False. \nSubcluster that fails to pass\
|
||||
\ this proportion threshold will be assigned 'Heterogeneous'.\"\n"
|
||||
info: null
|
||||
default:
|
||||
- 0.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
description: "Output arguments."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "output.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_obs_predictions"
|
||||
description: "In which `.obs` slots to store the predicted information.\n"
|
||||
info: null
|
||||
default:
|
||||
- "celltypist_pred"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_obs_probability"
|
||||
description: "In which `.obs` slots to store the probability of the predictions.\n"
|
||||
info: null
|
||||
default:
|
||||
- "celltypist_probability"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Automated cell type annotation tool for scRNA-seq datasets on the basis\
|
||||
\ of logistic regression classifiers optimised by the stochastic gradient descent\
|
||||
\ algorithm."
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "annotation_test_data"
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3"
|
||||
- type: "file"
|
||||
path: "openpipelinetestutils"
|
||||
dest: "openpipelinetestutils"
|
||||
info: null
|
||||
status: "enabled"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.10-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "fix-integration-tests"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "libhdf5-dev"
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "scanpy~=1.9.6"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "celltypist==1.6.3"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata==0.10.8"
|
||||
- "mudata~=0.2.4"
|
||||
- "pandas!=2.1.2"
|
||||
- "numpy<2.0.0"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "docker"
|
||||
copy:
|
||||
- "openpipelinetestutils /opt/openpipelinetestutils"
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "/opt/openpipelinetestutils"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/annotate/celltypist/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/annotate/celltypist"
|
||||
executable: "target/executable/annotate/celltypist/celltypist"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "da62b4ffe30b6ef36fcb7ef5944f29d45d1138ff"
|
||||
git_remote: "https://x-access-token:ghs_WgbTvvspBKMSQ5BPucz45vMfHrxqK54Vys9e@github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-1939-gda62b4ff"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "fix-integration-tests"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
|
||||
.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'fix-integration-tests'"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
1620
target/executable/annotate/celltypist/celltypist
Executable file
1620
target/executable/annotate/celltypist/celltypist
Executable file
File diff suppressed because it is too large
Load Diff
42
target/executable/annotate/celltypist/nextflow_labels.config
Normal file
42
target/executable/annotate/celltypist/nextflow_labels.config
Normal file
@@ -0,0 +1,42 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
12
target/executable/annotate/celltypist/setup_logger.py
Normal file
12
target/executable/annotate/celltypist/setup_logger.py
Normal file
@@ -0,0 +1,12 @@
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
367
target/executable/annotate/onclass/.config.vsh.yaml
Normal file
367
target/executable/annotate/onclass/.config.vsh.yaml
Normal file
@@ -0,0 +1,367 @@
|
||||
name: "onclass"
|
||||
namespace: "annotate"
|
||||
version: "fix-integration-tests"
|
||||
authors:
|
||||
- name: "Jakub Majercik"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Contributor"
|
||||
links:
|
||||
email: "jakub@data-intuitive.com"
|
||||
github: "jakubmajercik"
|
||||
linkedin: "jakubmajercik"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Bioinformatics Engineer"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
description: "Input dataset (query) arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "The input (query) data to be labeled. Should be a .h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "input.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality to process."
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_layer"
|
||||
description: "The layer in the input data to be used for cell type annotation\
|
||||
\ if .X is not to be used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--cl_nlp_emb_file"
|
||||
description: "The .nlp.emb file with the cell type embeddings."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--cl_ontology_file"
|
||||
description: "The .ontology file with the cell type ontology."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--cl_obo_file"
|
||||
description: "The .obo file with the cell type ontology."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_query_gene_names"
|
||||
description: "The name of the adata var column in the input data containing gene\
|
||||
\ names; when no gene_name_layer is provided, the var index will be used.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Reference"
|
||||
description: "Arguments related to the reference dataset."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--reference"
|
||||
description: "The reference data to train the CellTypist classifiers on. Only\
|
||||
\ required if a pre-trained --model is not provided."
|
||||
info: null
|
||||
example:
|
||||
- "reference.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_layer"
|
||||
description: "The layer in the reference data to be used for cell type annotation\
|
||||
\ if .X is not to be used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_obs_target"
|
||||
description: "The name of the adata obs column in the reference data containing\
|
||||
\ cell type annotations."
|
||||
info: null
|
||||
example:
|
||||
- "cell_ontology_class"
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
description: "Output arguments."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "output.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_obs_predictions"
|
||||
description: "In which `.obs` slots to store the predicted information.\n"
|
||||
info: null
|
||||
default:
|
||||
- "onclass_pred"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_obs_probability"
|
||||
description: "In which `.obs` slots to store the probability of the predictions.\n"
|
||||
info: null
|
||||
default:
|
||||
- "onclass_prob"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Model arguments"
|
||||
description: "Model arguments"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--model"
|
||||
description: "\"Pretrained model path without a file extension. If not provided,\
|
||||
\ the model will be trained \non the reference data and --reference should be\
|
||||
\ provided. The path namespace should contain:\n - a .npz or .pkl file\n -\
|
||||
\ a .data file\n - a .meta file\n - a .index file\ne.g. /path/to/model/pretrained_model_target1\
|
||||
\ as saved by OnClass.\"\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--max_iter"
|
||||
description: "Maximum number of iterations for training the model."
|
||||
info: null
|
||||
default:
|
||||
- 30
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "OnClass is a python package for single-cell cell type annotation. It\
|
||||
\ uses the Cell Ontology to capture the cell type similarity. \nThese similarities\
|
||||
\ enable OnClass to annotate cell types that are never seen in the training data.\n"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "annotation_test_data"
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3"
|
||||
- type: "file"
|
||||
path: "openpipelinetestutils"
|
||||
dest: "openpipelinetestutils"
|
||||
info: null
|
||||
status: "enabled"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.8"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "fix-integration-tests"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "scikit-learn==0.24.0"
|
||||
- "OnClass==1.2"
|
||||
- "tensorflow==2.13.1"
|
||||
- "obonet==1.1.0"
|
||||
- "mudata"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "docker"
|
||||
copy:
|
||||
- "openpipelinetestutils /opt/openpipelinetestutils"
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "/opt/openpipelinetestutils"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/annotate/onclass/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/annotate/onclass"
|
||||
executable: "target/executable/annotate/onclass/onclass"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "da62b4ffe30b6ef36fcb7ef5944f29d45d1138ff"
|
||||
git_remote: "https://x-access-token:ghs_WgbTvvspBKMSQ5BPucz45vMfHrxqK54Vys9e@github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-1939-gda62b4ff"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "fix-integration-tests"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
|
||||
.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'fix-integration-tests'"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
42
target/executable/annotate/onclass/nextflow_labels.config
Normal file
42
target/executable/annotate/onclass/nextflow_labels.config
Normal file
@@ -0,0 +1,42 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
1613
target/executable/annotate/onclass/onclass
Executable file
1613
target/executable/annotate/onclass/onclass
Executable file
File diff suppressed because it is too large
Load Diff
12
target/executable/annotate/onclass/setup_logger.py
Normal file
12
target/executable/annotate/onclass/setup_logger.py
Normal file
@@ -0,0 +1,12 @@
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
387
target/executable/annotate/popv/.config.vsh.yaml
Normal file
387
target/executable/annotate/popv/.config.vsh.yaml
Normal file
@@ -0,0 +1,387 @@
|
||||
name: "popv"
|
||||
namespace: "annotate"
|
||||
version: "fix-integration-tests"
|
||||
authors:
|
||||
- name: "Matthias Beyens"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Contributor"
|
||||
links:
|
||||
github: "MatthiasBeyens"
|
||||
orcid: "0000-0003-3304-0706"
|
||||
email: "matthias.beyens@gmail.com"
|
||||
linkedin: "mbeyens"
|
||||
organizations:
|
||||
- name: "Janssen Pharmaceuticals"
|
||||
href: "https://www.janssen.com"
|
||||
role: "Principal Scientist"
|
||||
- name: "Robrecht Cannoodt"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "robrecht@data-intuitive.com"
|
||||
github: "rcannood"
|
||||
orcid: "0000-0003-3641-729X"
|
||||
linkedin: "robrechtcannoodt"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Science Engineer"
|
||||
- name: "Open Problems"
|
||||
href: "https://openproblems.bio"
|
||||
role: "Core Member"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
description: "Arguments related to the input (aka query) dataset."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Input h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "input.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality to process."
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_layer"
|
||||
description: "Which layer to use. If no value is provided, the counts are assumed\
|
||||
\ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[input_layer]`."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_obs_batch"
|
||||
description: "Key in obs field of input adata for batch information. If no value\
|
||||
\ is provided, batch label is assumed to be unknown."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_var_subset"
|
||||
description: "Subset the input object with this column."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_obs_label"
|
||||
description: "Key in obs field of input adata for label information. This is only\
|
||||
\ used for training scANVI. Unlabelled cells should be set to `\"unknown_celltype_label\"\
|
||||
`."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--unknown_celltype_label"
|
||||
description: "If `input_obs_label` is specified, cells with this value will be\
|
||||
\ treated as unknown and will be predicted by the model."
|
||||
info: null
|
||||
default:
|
||||
- "unknown"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Reference"
|
||||
description: "Arguments related to the reference dataset."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--reference"
|
||||
description: "User-provided reference tissue. The data that will be used as reference\
|
||||
\ to call cell types."
|
||||
info: null
|
||||
example:
|
||||
- "TS_Bladder_filtered.h5ad"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_layer"
|
||||
description: "Which layer to use. If no value is provided, the counts are assumed\
|
||||
\ to be in the `.X` slot. Otherwise, count data is expected to be in `.layers[reference_layer]`."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_obs_label"
|
||||
description: "Key in obs field of reference AnnData with cell-type information."
|
||||
info: null
|
||||
default:
|
||||
- "cell_ontology_class"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_obs_batch"
|
||||
description: "Key in obs field of input adata for batch information."
|
||||
info: null
|
||||
default:
|
||||
- "donor_assay"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
description: "Output arguments."
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "output.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Arguments"
|
||||
description: "Other arguments."
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--methods"
|
||||
description: "Methods to call cell types. By default, runs to knn_on_scvi and\
|
||||
\ scanvi."
|
||||
info: null
|
||||
example:
|
||||
- "knn_on_scvi"
|
||||
- "scanvi"
|
||||
required: true
|
||||
choices:
|
||||
- "celltypist"
|
||||
- "knn_on_bbknn"
|
||||
- "knn_on_scanorama"
|
||||
- "knn_on_scvi"
|
||||
- "onclass"
|
||||
- "rf"
|
||||
- "scanvi"
|
||||
- "svm"
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Performs popular major vote cell typing on single cell sequence data\
|
||||
\ using multiple algorithms. Note that this is a one-shot version of PopV."
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "annotation_test_data"
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3"
|
||||
- type: "file"
|
||||
path: "openpipelinetestutils"
|
||||
dest: "openpipelinetestutils"
|
||||
info: null
|
||||
status: "enabled"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
label:
|
||||
- "highmem"
|
||||
- "highcpu"
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.9-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "fix-integration-tests"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
- "git"
|
||||
- "build-essential"
|
||||
- "wget"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "scanpy~=1.9.6"
|
||||
- "scvi-tools~=1.0.3"
|
||||
- "popv~=0.3.2"
|
||||
- "jax==0.4.10"
|
||||
- "jaxlib==0.4.10"
|
||||
- "ml-dtypes<0.3.0"
|
||||
- "scipy==1.12.0"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata==0.10.8"
|
||||
- "mudata~=0.2.4"
|
||||
- "pandas!=2.1.2"
|
||||
- "numpy<2.0.0"
|
||||
upgrade: true
|
||||
- type: "docker"
|
||||
run:
|
||||
- "cd /opt && git clone --depth 1 https://github.com/YosefLab/PopV.git && \\\n\
|
||||
\ cd PopV && git fetch --depth 1 origin tag v0.2 && git checkout v0.2\n"
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/annotate/popv/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/annotate/popv"
|
||||
executable: "target/executable/annotate/popv/popv"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "da62b4ffe30b6ef36fcb7ef5944f29d45d1138ff"
|
||||
git_remote: "https://x-access-token:ghs_WgbTvvspBKMSQ5BPucz45vMfHrxqK54Vys9e@github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-1939-gda62b4ff"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "fix-integration-tests"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".test_resources += {path: '/src/base/openpipelinetestutils', dest: 'openpipelinetestutils'}\n\
|
||||
.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].directives.tag := '$id'\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'fix-integration-tests'"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
42
target/executable/annotate/popv/nextflow_labels.config
Normal file
42
target/executable/annotate/popv/nextflow_labels.config
Normal file
@@ -0,0 +1,42 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user