Files
openpipeline/src/annotate/onclass/test.py
CI cd0af18851 Build branch fix-integration-tests with version dev (2dbe3b72)
Build pipeline: vsh-ci-dev-k8tz4

Source commit: 2dbe3b7231

Source message: Fix pointers to test resources
2024-10-17 17:56:12 +00:00

146 lines
5.4 KiB
Python

import sys
import os
import pytest
import subprocess
import re
import mudata as mu
import anndata as ad
from openpipelinetestutils.asserters import assert_annotation_objects_equal
import os
## VIASH START
meta = {
"resources_dir": "resources_test"
}
## VIASH END
input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
cl_nlp_emb_file = f"{meta['resources_dir']}/annotation_test_data/ontology/cl.ontology.nlp.emb"
cl_ontology_file = f"{meta['resources_dir']}/annotation_test_data/ontology/cl.ontology"
cl_obo_file = f"{meta['resources_dir']}/annotation_test_data/ontology/cl.obo"
model_file = f"{meta['resources_dir']}/annotation_test_data/onclass_model/example_file_model"
@pytest.fixture
def swap_gene_symbol(random_h5mu_path):
def wrapper(input_mudata_file, modality):
input_mudata = mu.read_h5mu(input_mudata_file)
input_adata = input_mudata.mod[modality]
adata = input_adata.copy()
adata.var["ensemblid"] = list(adata.var.index)
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var["gene_symbol"]]
input_mudata.mod[modality] = adata
swapped_input_mudata_file = random_h5mu_path()
input_mudata.write_h5mu(swapped_input_mudata_file)
return swapped_input_mudata_file
return wrapper
def test_simple_execution(run_component, random_h5mu_path):
output_file = random_h5mu_path()
run_component([
"--input", input_file,
"--reference", reference_file,
"--reference_obs_target", "cell_ontology_class",
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--max_iter", "10",
"--output", output_file
])
assert os.path.exists(output_file), "Output file does not exist"
input_mudata = mu.read_h5mu(input_file)
output_mudata = mu.read_h5mu(output_file)
assert_annotation_objects_equal(input_mudata.mod["prot"],
output_mudata.mod["prot"])
assert list(output_mudata.mod["rna"].obs.keys()) == ['onclass_pred',
'onclass_prob']
obs_values = output_mudata.mod["rna"].obs["onclass_prob"]
assert all(0 <= value <= 1 for value in obs_values), ".obs at cell_ontology_class_prob has values outside the range [0, 1]"
def test_custom_obs(run_component, random_h5mu_path):
output_file = random_h5mu_path()
run_component([
"--input", input_file,
"--reference", reference_file,
"--reference_obs_target", "cell_ontology_class",
"--output_obs_predictions", "dummy_pred_1",
"--output_obs_probability", "dummy_prob_1",
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--max_iter", "10",
"--output", output_file
])
assert os.path.exists(output_file), "Output file does not exist"
input_mudata = mu.read_h5mu(input_file)
output_mudata = mu.read_h5mu(output_file)
assert_annotation_objects_equal(input_mudata.mod["prot"],
output_mudata.mod["prot"])
assert set(output_mudata.mod["rna"].obs.keys()) == {'dummy_pred_1', 'dummy_prob_1'}
obs_keys = ['dummy_prob_1']
for key in obs_keys:
obs_values = output_mudata.mod["rna"].obs[key]
assert all(0 <= value <= 1 for value in obs_values), f".obs at {key} has values outside the range [0, 1]"
def test_no_model_no_reference_error(run_component, random_h5mu_path):
output_file = random_h5mu_path()
with pytest.raises(subprocess.CalledProcessError) as err:
run_component([
"--input", input_file,
"--output", output_file,
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--reference_obs_target", "cell_ontology_class"
])
assert re.search(r"ValueError: Make sure to provide either 'model' or 'reference', but not both.",
err.value.stdout.decode('utf-8'))
def test_pretrained_model(run_component, random_h5mu_path, swap_gene_symbol):
output_file = random_h5mu_path()
swapped_input_file = swap_gene_symbol(input_file, "rna")
run_component([
"--input", swapped_input_file,
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--reference_obs_target", "cell_ontology_class",
"--model", model_file,
"--output", output_file
])
assert os.path.exists(output_file), "Output file does not exist"
input_mudata = mu.read_h5mu(input_file)
output_mudata = mu.read_h5mu(output_file)
assert_annotation_objects_equal(input_mudata.mod["prot"],
output_mudata.mod["prot"])
assert list(output_mudata.mod["rna"].obs.keys()) == ['onclass_pred',
'onclass_prob']
obs_values = output_mudata.mod["rna"].obs["onclass_prob"]
assert all(0 <= value <= 1 for value in obs_values), ".obs at cell_ontology_class_prob has values outside the range [0, 1]"
if __name__ == '__main__':
sys.exit(pytest.main([__file__]))