Files
openpipeline/src/workflows/multiomics/process_samples/test.nf
CI bb7533583f Build branch fix-integration-tests with version fix-integration-tests (da62b4ff)
Build pipeline: vsh-ci-dev-gckj5

Source commit: da62b4ffe3

Source message: Add labels to qc_test component
2024-11-15 14:37:33 +00:00

351 lines
11 KiB
Plaintext

nextflow.enable.dsl=2
targetDir = params.rootDir + "/target/nextflow"
include { remove_modality } from targetDir + '/filter/remove_modality/main.nf'
include { move_layer } from targetDir + '/transform/move_layer/main.nf'
include { process_samples } from targetDir + "/workflows/multiomics/process_samples/main.nf"
params.resources_test = params.rootDir + "/resources_test"
workflow test_wf {
resources_test = file(params.resources_test)
output_ch = Channel.fromList([
[
id: "mouse",
input: resources_test.resolve("concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
publish_dir: "foo/",
rna_min_counts: 2,
output: "test.h5mu",
],
[
id: "human",
input: resources_test.resolve("concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
publish_dir: "foo/",
rna_min_counts: 2,
output: "test.h5mu",
]
])
| map{ state -> [state.id, state] }
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith("test.h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}
workflow test_wf2 {
resources_test = file(params.resources_test)
output_ch = Channel.fromList([
[
id: "pbmc",
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
var_name_mitochondrial_genes: 'mitochondrial',
rna_min_counts: 2,
prot_min_counts: 3,
add_id_to_obs: true,
add_id_make_observation_keys_unique: true,
add_id_obs_output: "sample_id"
],
[
id: "pbmc_with_more_args",
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
rna_min_counts: 2,
rna_max_counts: 1000000,
rna_min_genes_per_cell: 1,
rna_max_genes_per_cell: 1000000,
rna_min_cells_per_gene: 1,
rna_min_fraction_mito: 0.0,
rna_max_fraction_mito: 1.0,
prot_min_counts: 3,
prot_max_counts: 1000000,
prot_min_proteins_per_cell: 1,
prot_max_proteins_per_cell: 1000000,
prot_min_cells_per_protein: 1,
var_name_mitochondrial_genes: 'mitochondrial',
obs_name_mitochondrial_fraction: 'fraction_mitochondrial',
add_id_to_obs: true,
add_id_make_observation_keys_unique: true,
add_id_obs_output: "sample_id"
],
])
| map{ state -> [state.id, state] }
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
// The result of this pipeline is always 1 merged sample, regardless of the number of input samples.
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}
workflow test_wf3 {
resources_test = file(params.resources_test)
input_ch = Channel.fromList([
[
id: "mouse",
input: resources_test.resolve("concat_test_data/e18_mouse_brain_fresh_5k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
publish_dir: "foo/",
rna_min_counts: 2,
var_qc_metrics: "highly_variable",
highly_variable_features_var_output: "highly_variable",
],
[
id: "human",
input: resources_test.resolve("concat_test_data/human_brain_3k_filtered_feature_bc_matrix_subset_unique_obs.h5mu"),
publish_dir: "foo/",
rna_min_counts: 2,
var_qc_metrics: "highly_variable",
highly_variable_features_var_output: "highly_variable",
]
])
| map{ state -> [state.id, state] }
human_ch = input_ch
| filter{it[0] == "human"}
| remove_modality.run(
fromState: { id, state ->
[
"input": state.input,
"modality": "atac"
]
},
toState: ["input": "output"]
)
mouse_ch = input_ch
| filter{it[0] == "mouse"}
| remove_modality.run(
fromState: { id, state ->
[
"input": state.input,
"modality": "rna"
]
},
toState: ["input": "output"]
)
output_ch_test_2 = human_ch.concat(mouse_ch)
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}
workflow test_wf4 {
resources_test = file(params.resources_test)
output_ch = Channel.fromList([
[
id: "10x_5k_lung_crispr",
input: resources_test.resolve("10x_5k_lung_crispr/SC3_v3_NextGem_DI_CRISPR_A549_5K.h5mu"),
rna_min_counts: 2,
gdo_min_counts: 10,
gdo_max_counts: 10000,
add_id_to_obs: true,
add_id_make_observation_keys_unique: true,
add_id_obs_output: "sample_id"
]
])
| map{ state -> [state.id, state] }
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
// The result of this pipeline is always 1 merged sample, regardless of the number of input samples.
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}
workflow test_wf5 {
resources_test = file(params.resources_test)
output_ch = Channel.fromList([
[
id: "5k_human_antiCMV_T_TBNK_select_layer",
input: resources_test.resolve("10x_5k_lung_crispr/SC3_v3_NextGem_DI_CRISPR_A549_5K.h5mu"),
rna_min_counts: 2,
rna_max_counts: 1000000,
rna_min_genes_per_cell: 1,
rna_max_genes_per_cell: 1000000,
rna_min_cells_per_gene: 1,
rna_min_fraction_mito: 0.0,
rna_max_fraction_mito: 1.0,
var_name_mitochondrial_genes: 'mitochondrial',
obs_name_mitochondrial_fraction: 'fraction_mitochondrial',
add_id_to_obs: true,
add_id_make_observation_keys_unique: true,
add_id_obs_output: "sample_id",
rna_layer: "test_layer"
],
])
| map{ state -> [state.id, state] }
| move_layer.run(
fromState: { id, state ->
[
"input": state.input,
"output_layer": "test_layer",
"modality": "rna"
]
},
toState: ["input": "output"]
)
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
// The result of this pipeline is always 1 merged sample, regardless of the number of input samples.
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}
workflow test_wf6 {
resources_test = file(params.resources_test)
output_ch = Channel.fromList([
[
id: "pbmc_clr_axis_0",
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
clr_axis: 1
],
[
id: "pbmc_clr_axis_1",
input: resources_test.resolve("pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"),
clr_axis: 1
]
])
| map{ state -> [state.id, state] }
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}
// The following test is supposed to fail. It is used to test the error handling of the pipeline.
// However, there is not way to catch the error originating from the workflow
// workflow test_wf4 {
// helpMessage(config)
// // allow changing the resources_test dir
// params.resources_test = params.rootDir + "/resources_test"
// testParams = [
// param_list: [
// [
// id: "pbmc",
// input: params.resources_test + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu",
// output: "foo.h5mu"
// ],
// [
// id: "pbmc_with_more_args",
// input: params.resources_test + "/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu",
// output: "foo2.h5mu"
// ],
// ],
// ]
// output_ch = channelFromParams(testParams, config)
// | run_wf
// }
workflow test_wf7 {
resources_test = file(params.resources_test)
output_ch = Channel.fromList([
[
id: "5k_human_antiCMV_T_TBNK_select_layer",
input: resources_test.resolve("10x_5k_lung_crispr/SC3_v3_NextGem_DI_CRISPR_A549_5K.h5mu"),
rna_min_counts: 2,
rna_max_counts: 1000000,
rna_min_genes_per_cell: 1,
rna_max_genes_per_cell: 1000000,
rna_min_cells_per_gene: 1,
rna_min_fraction_mito: 0.0,
rna_max_fraction_mito: 1.0,
rna_enable_scaling: true,
var_name_mitochondrial_genes: 'mitochondrial',
obs_name_mitochondrial_fraction: 'fraction_mitochondrial',
add_id_to_obs: true,
add_id_make_observation_keys_unique: true,
add_id_obs_output: "sample_id",
rna_layer: "test_layer",
],
])
| map{ state -> [state.id, state] }
| move_layer.run(
fromState: { id, state ->
[
"input": state.input,
"output_layer": "test_layer",
"modality": "rna"
]
},
toState: ["input": "output"]
)
| process_samples
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
assert output[1].output.toString().endsWith(".h5mu") : "Output file should be a h5mu file. Found: ${output[1].output}"
"Output: $output"
}
| toSortedList()
| map { output_list ->
// The result of this pipeline is always 1 merged sample, regardless of the number of input samples.
assert output_list.size() == 1 : "output channel should contain one event"
assert output_list[0][0] == "merged" : "Output ID should be 'merged'"
}
}