Files
openpipeline/src/annotate/onclass/test.py

146 lines
5.4 KiB
Python
Raw Normal View History

import sys
import os
import pytest
import subprocess
import re
import mudata as mu
import anndata as ad
from openpipelinetestutils.asserters import assert_annotation_objects_equal
import os
## VIASH START
meta = {
"resources_dir": "resources_test"
}
## VIASH END
input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
cl_nlp_emb_file = f"{meta['resources_dir']}/annotation_test_data/ontology/cl.ontology.nlp.emb"
cl_ontology_file = f"{meta['resources_dir']}/annotation_test_data/ontology/cl.ontology"
cl_obo_file = f"{meta['resources_dir']}/annotation_test_data/ontology/cl.obo"
model_file = f"{meta['resources_dir']}/annotation_test_data/onclass_model/example_file_model"
@pytest.fixture
def swap_gene_symbol(random_h5mu_path):
def wrapper(input_mudata_file, modality):
input_mudata = mu.read_h5mu(input_mudata_file)
input_adata = input_mudata.mod[modality]
adata = input_adata.copy()
adata.var["ensemblid"] = list(adata.var.index)
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var["gene_symbol"]]
input_mudata.mod[modality] = adata
swapped_input_mudata_file = random_h5mu_path()
input_mudata.write_h5mu(swapped_input_mudata_file)
return swapped_input_mudata_file
return wrapper
def test_simple_execution(run_component, random_h5mu_path):
output_file = random_h5mu_path()
run_component([
"--input", input_file,
"--reference", reference_file,
"--reference_obs_target", "cell_ontology_class",
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--max_iter", "10",
"--output", output_file
])
assert os.path.exists(output_file), "Output file does not exist"
input_mudata = mu.read_h5mu(input_file)
output_mudata = mu.read_h5mu(output_file)
assert_annotation_objects_equal(input_mudata.mod["prot"],
output_mudata.mod["prot"])
assert list(output_mudata.mod["rna"].obs.keys()) == ['onclass_pred',
'onclass_prob']
obs_values = output_mudata.mod["rna"].obs["onclass_prob"]
assert all(0 <= value <= 1 for value in obs_values), ".obs at cell_ontology_class_prob has values outside the range [0, 1]"
def test_custom_obs(run_component, random_h5mu_path):
output_file = random_h5mu_path()
run_component([
"--input", input_file,
"--reference", reference_file,
"--reference_obs_target", "cell_ontology_class",
"--output_obs_predictions", "dummy_pred_1",
"--output_obs_probability", "dummy_prob_1",
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--max_iter", "10",
"--output", output_file
])
assert os.path.exists(output_file), "Output file does not exist"
input_mudata = mu.read_h5mu(input_file)
output_mudata = mu.read_h5mu(output_file)
assert_annotation_objects_equal(input_mudata.mod["prot"],
output_mudata.mod["prot"])
assert set(output_mudata.mod["rna"].obs.keys()) == {'dummy_pred_1', 'dummy_prob_1'}
obs_keys = ['dummy_prob_1']
for key in obs_keys:
obs_values = output_mudata.mod["rna"].obs[key]
assert all(0 <= value <= 1 for value in obs_values), f".obs at {key} has values outside the range [0, 1]"
def test_no_model_no_reference_error(run_component, random_h5mu_path):
output_file = random_h5mu_path()
with pytest.raises(subprocess.CalledProcessError) as err:
run_component([
"--input", input_file,
"--output", output_file,
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--reference_obs_target", "cell_ontology_class"
])
assert re.search(r"ValueError: Make sure to provide either 'model' or 'reference', but not both.",
err.value.stdout.decode('utf-8'))
def test_pretrained_model(run_component, random_h5mu_path, swap_gene_symbol):
output_file = random_h5mu_path()
swapped_input_file = swap_gene_symbol(input_file, "rna")
run_component([
"--input", swapped_input_file,
"--cl_nlp_emb_file", cl_nlp_emb_file,
"--cl_ontology_file", cl_ontology_file,
"--cl_obo_file", cl_obo_file,
"--reference_obs_target", "cell_ontology_class",
"--model", model_file,
"--output", output_file
])
assert os.path.exists(output_file), "Output file does not exist"
input_mudata = mu.read_h5mu(input_file)
output_mudata = mu.read_h5mu(output_file)
assert_annotation_objects_equal(input_mudata.mod["prot"],
output_mudata.mod["prot"])
assert list(output_mudata.mod["rna"].obs.keys()) == ['onclass_pred',
'onclass_prob']
obs_values = output_mudata.mod["rna"].obs["onclass_prob"]
assert all(0 <= value <= 1 for value in obs_values), ".obs at cell_ontology_class_prob has values outside the range [0, 1]"
if __name__ == '__main__':
sys.exit(pytest.main([__file__]))