Build pipeline: viash-hub.htrnaseq.project-experimentes-uniqueness-lcdnf
Source commit: 93c3200f38
Source message: Undo some changes
173 lines
9.3 KiB
Python
173 lines
9.3 KiB
Python
import pytest
|
|
import sys
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from uuid import uuid4
|
|
|
|
### VIASH START
|
|
meta = {
|
|
"resources_dir": "./src/eset/create_pdata/",
|
|
"executable": "target/executable/eset/create_pdata/create_pdata",
|
|
"config": "src/eset/create_pdata/config.vsh.yaml"
|
|
}
|
|
### VIASH END
|
|
|
|
@pytest.fixture
|
|
def test_reads_and_genes_per_chr_path():
|
|
return Path(meta["resources_dir"]) / "nrReadsNrGenesPerChromPool.txt"
|
|
|
|
|
|
@pytest.fixture
|
|
def test_star_logs_summary_path():
|
|
return Path(meta["resources_dir"]) / "starLogs.txt"
|
|
|
|
|
|
@pytest.fixture
|
|
def random_path(tmp_path):
|
|
def wrapper(extension=None):
|
|
extension = "" if not extension else f".{extension}"
|
|
return tmp_path / f"{uuid4()}{extension}"
|
|
return wrapper
|
|
|
|
|
|
def test_create_pdata(run_component, test_reads_and_genes_per_chr_path,
|
|
test_star_logs_summary_path, random_path):
|
|
output_path = random_path("tsv")
|
|
run_component([
|
|
"--star_stats_file", test_star_logs_summary_path,
|
|
"--nrReadsNrGenesPerChromPool", test_reads_and_genes_per_chr_path,
|
|
"--output", output_path
|
|
])
|
|
assert output_path.is_file()
|
|
result = pd.read_csv(output_path, sep="\t", dtype=pd.StringDtype())
|
|
expected_dict = {
|
|
'WellBC': ['AACAAGGTAC', 'ACGCCTTCGT', 'CCATACTGAC', 'GCAAGCGAAT',
|
|
'GTCTCGAGTG', 'TGCGCTCATT', 'TTGTGTTCGA'],
|
|
'WellID': ['A1', 'A2', 'A3', 'B1', 'C5', 'D6', 'E19'],
|
|
'NumberOfMTReads': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'pctMT': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfERCCReads': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'pctERCC': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfChromReads': ['8542', '5863', '7396', '10092', '470',
|
|
'7650', '9422'],
|
|
'pctChrom': ['100', '100', '100', '100', '100', '100', '100'],
|
|
'NumberOfInputReads': ['141303', '96430', '113577', '156134', '10158',
|
|
'126989', '142560'],
|
|
'NumberOfMappedReads': ['23749', '16869', '17319', '24005', '1902',
|
|
'19272', '22129'],
|
|
'PctMappedReads': ['16.81', '17.49', '15.25', '15.37', '18.72',
|
|
'15.18', '15.52'],
|
|
'NumberOfReadsMappedToMultipleLoci': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'PectOfReadsMappedToMultipleLoci': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfReadsMappedToTooManyLoci': ['8458', '6124', '5905', '7961', '967',
|
|
'7141', '7045'],
|
|
'PectOfReadsMappedToTooManyLoci': ['5.99', '6.35', '5.2', '5.1', '9.52',
|
|
'5.62', '4.94'],
|
|
'NumberOfReadsUnmappedTooManyMismatches': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'PectOfReadsUnmappedTooManyMismatches': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfReadsUnmappedTooShort': ['109035', '73375', '90292', '124096',
|
|
'7280', '100515', '113324'],
|
|
'PectOfReadsUnmappedTooShort': ['77.16', '76.09', '79.5', '79.48',
|
|
'71.67', '79.15', '79.49'],
|
|
'NumberOfReadsUnmappedOther': ['61', '62', '61', '72', '9', '61', '62'],
|
|
'PectOfReadsUnmappedOther': ['0.04', '0.06', '0.05', '0.05',
|
|
'0.09', '0.05', '0.04'],
|
|
'ReadsWithValidBarcodes': ['0.999816', '0.999782', '0.999859', '0.999744',
|
|
'0.999803', '0.999843', '0.999783'],
|
|
'SequencingSaturation': ['0.0698056', '0.0665302', '0.0717282', '0.0680872',
|
|
'0.0553191', '0.0667974', '0.060828'],
|
|
'Q30BasesInCB+UMI': ['0.979965', '0.980077', '0.982313', '0.982779',
|
|
'0.984451', '0.986581', '0.986622'],
|
|
'ReadsMappedToTranscriptome:Unique+MultipeGenes': ['0.0618175', '0.0620969',
|
|
'0.066554', '0.0658665',
|
|
'0.0476472', '0.0616668',
|
|
'0.0676838'],
|
|
'EstimatedNumberOfCells': ['1', '1', '1', '1', '1', '1', '1'],
|
|
'FractionOfReadsInCells': ['1', '1', '1', '1', '1', '1', '1'],
|
|
'MeanReadsPerCell': ['8538', '5862', '7389',
|
|
'10090', '470', '7650', '9420'],
|
|
'NumberOfUMIs': ['7942', '5472', '6859', '9403',
|
|
'444', '7139', '8847'],
|
|
'NumberOfGenes': ['408', '377', '391', '420', '150', '407', '420'],
|
|
'NumberOfCountedReads': ['9535', '6463', '8299', '11273',
|
|
'533', '8444', '10383']
|
|
}
|
|
expected = pd.DataFrame.from_dict(expected_dict, dtype=pd.StringDtype())
|
|
pd.testing.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
def test_na(run_component, test_reads_and_genes_per_chr_path,
|
|
test_star_logs_summary_path, random_path):
|
|
"""
|
|
The star log summary can contain NA values.
|
|
"""
|
|
output_path = random_path("tsv")
|
|
summary_with_na_path = random_path("txt")
|
|
original_summary = pd.read_csv(test_star_logs_summary_path,
|
|
sep="\t", index_col=0)
|
|
original_summary.loc["GTCTCGAGTG", "FractionOfReadsInCells"] = pd.NA
|
|
original_summary.reset_index("WellBC").to_csv(summary_with_na_path, sep="\t",
|
|
header=True, index=False)
|
|
run_component([
|
|
"--star_stats_file", summary_with_na_path,
|
|
"--nrReadsNrGenesPerChromPool", test_reads_and_genes_per_chr_path,
|
|
"--output", output_path
|
|
])
|
|
expected_dict = {
|
|
'WellBC': ['AACAAGGTAC', 'ACGCCTTCGT', 'CCATACTGAC', 'GCAAGCGAAT',
|
|
'GTCTCGAGTG', 'TGCGCTCATT', 'TTGTGTTCGA'],
|
|
'WellID': ['A1', 'A2', 'A3', 'B1', 'C5', 'D6', 'E19'],
|
|
'NumberOfMTReads': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'pctMT': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfERCCReads': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'pctERCC': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfChromReads': ['8542', '5863', '7396', '10092', '470',
|
|
'7650', '9422'],
|
|
'pctChrom': ['100', '100', '100', '100', '100', '100', '100'],
|
|
'NumberOfInputReads': ['141303', '96430', '113577', '156134', '10158',
|
|
'126989', '142560'],
|
|
'NumberOfMappedReads': ['23749', '16869', '17319', '24005', '1902',
|
|
'19272', '22129'],
|
|
'PctMappedReads': ['16.81', '17.49', '15.25', '15.37', '18.72',
|
|
'15.18', '15.52'],
|
|
'NumberOfReadsMappedToMultipleLoci': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'PectOfReadsMappedToMultipleLoci': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfReadsMappedToTooManyLoci': ['8458', '6124', '5905', '7961', '967',
|
|
'7141', '7045'],
|
|
'PectOfReadsMappedToTooManyLoci': ['5.99', '6.35', '5.2', '5.1', '9.52',
|
|
'5.62', '4.94'],
|
|
'NumberOfReadsUnmappedTooManyMismatches': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'PectOfReadsUnmappedTooManyMismatches': ['0', '0', '0', '0', '0', '0', '0'],
|
|
'NumberOfReadsUnmappedTooShort': ['109035', '73375', '90292', '124096',
|
|
'7280', '100515', '113324'],
|
|
'PectOfReadsUnmappedTooShort': ['77.16', '76.09', '79.5', '79.48',
|
|
'71.67', '79.15', '79.49'],
|
|
'NumberOfReadsUnmappedOther': ['61', '62', '61', '72', '9', '61', '62'],
|
|
'PectOfReadsUnmappedOther': ['0.04', '0.06', '0.05', '0.05',
|
|
'0.09', '0.05', '0.04'],
|
|
'ReadsWithValidBarcodes': ['0.999816', '0.999782', '0.999859', '0.999744',
|
|
'0.999803', '0.999843', '0.999783'],
|
|
'SequencingSaturation': ['0.0698056', '0.0665302', '0.0717282', '0.0680872',
|
|
'0.0553191', '0.0667974', '0.060828'],
|
|
'Q30BasesInCB+UMI': ['0.979965', '0.980077', '0.982313', '0.982779',
|
|
'0.984451', '0.986581', '0.986622'],
|
|
'ReadsMappedToTranscriptome:Unique+MultipeGenes': ['0.0618175', '0.0620969',
|
|
'0.066554', '0.0658665',
|
|
'0.0476472', '0.0616668',
|
|
'0.0676838'],
|
|
'EstimatedNumberOfCells': ['1', '1', '1', '1', '1', '1', '1'],
|
|
'FractionOfReadsInCells': ['1.0', '1.0', '1.0', '1.0', pd.NA, '1.0', '1.0'],
|
|
'MeanReadsPerCell': ['8538', '5862', '7389',
|
|
'10090', '470', '7650', '9420'],
|
|
'NumberOfUMIs': ['7942', '5472', '6859', '9403',
|
|
'444', '7139', '8847'],
|
|
'NumberOfGenes': ['408', '377', '391', '420', '150', '407', '420'],
|
|
'NumberOfCountedReads': ['9535', '6463', '8299', '11273',
|
|
'533', '8444', '10383']
|
|
}
|
|
result = pd.read_csv(output_path, sep="\t", dtype=pd.StringDtype())
|
|
expected = pd.DataFrame.from_dict(expected_dict, dtype=pd.StringDtype())
|
|
pd.testing.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(pytest.main([__file__])) |