Files
biobox/src/bedtools/bedtools_intersect/test.sh
CI 6df312e701 Build branch gatk_applybqsrspark with version gatk_applybqsrspark (c4ea23a)
Build pipeline: viash-hub.biobox.gatk-applybqsrspark-jpq6r

Source commit: c4ea23a0f5

Source message: Add RSEM prepare reference component (#89)

* initial commit

* incorporaate some requested changes

* update test

* change argument reference_fasta_files to multiple true and update docker setup

* Update src/rsem/rsem_prepare_reference/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/rsem/rsem_prepare_reference/config.vsh.yaml

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* Update src/rsem/rsem_prepare_reference/script.sh

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>

* set multiple true

* update changelog

* Apply suggestions from code review

* fix script

---------

Co-authored-by: Robrecht Cannoodt <rcannood@gmail.com>
2024-09-01 17:36:53 +00:00

341 lines
11 KiB
Bash

#!/bin/bash
# exit on error
set -e
## VIASH START
meta_executable="target/executable/bedtools/bedtools_intersect/bedtools_intersect"
meta_resources_dir="src/bedtools/bedtools_intersect"
## VIASH END
#############################################
# helper functions
assert_file_exists() {
[ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
}
assert_file_not_empty() {
[ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
}
assert_file_contains() {
grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
assert_identical_content() {
diff -a "$2" "$1" \
|| (echo "Files are not identical!" && exit 1)
}
#############################################
# Create directories for tests
echo "Creating Test Data..."
mkdir -p test_data
# Create and populate featuresA.bed
printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/featuresA.bed"
# Create and populate featuresB.bed
printf "chr1\t180\t280\nchr1\t290\t390\nchr1\t500\t600\n" > "test_data/featuresB.bed"
# Create and populate featuresC.bed
printf "chr1\t120\t220\nchr1\t250\t350\nchr1\t500\t580\n" > "test_data/featuresC.bed"
# Create and populate examples gff files
# example1.gff
printf "##gff-version 3\n" > "test_data/example1.gff"
printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/example1.gff"
printf "chr1\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/example1.gff"
printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/example1.gff"
printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/example1.gff"
printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/example1.gff"
printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/example1.gff"
# example2.gff
printf "##gff-version 3\n" > "test_data/example2.gff"
printf "chr1\t.\tgene\t1200\t1800\t.\t-\t.\tID=gene2;Name=Gene2\n" >> "test_data/example2.gff"
printf "chr1\t.\tmRNA\t1400\t2000\t.\t-\t.\tID=transcript2;Parent=gene2\n" >> "test_data/example2.gff"
printf "chr1\t.\texon\t1400\t2000\t.\t-\t.\tID=exon3;Parent=transcript2\n" >> "test_data/example2.gff"
printf "chr1\t.\texon\t1600\t2000\t.\t-\t.\tID=exon4;Parent=transcript2\n" >> "test_data/example2.gff"
printf "chr1\t.\tCDS\t3000\t3200\t.\t-\t1\tID=cds3;Parent=transcript2\n" >> "test_data/example2.gff"
printf "chr1\t.\tCDS\t3500\t3700\t.\t-\t0\tID=cds4;Parent=transcript2\n" >> "test_data/example2.gff"
# Create and populate expected output files for different tests
printf "chr1\t180\t200\nchr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_default.bed"
printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/expected_wa.bed"
printf "chr1\t180\t200\tchr1\t180\t280\nchr1\t180\t250\tchr1\t180\t280\nchr1\t300\t390\tchr1\t290\t390\n" > "test_data/expected_wb.bed"
printf "chr1\t100\t200\tchr1\t180\t280\nchr1\t150\t250\tchr1\t180\t280\nchr1\t300\t400\tchr1\t290\t390\n" > "test_data/expected_loj.bed"
printf "chr1\t100\t200\tchr1\t180\t280\t20\nchr1\t150\t250\tchr1\t180\t280\t70\nchr1\t300\t400\tchr1\t290\t390\t90\n" > "test_data/expected_wo.bed"
printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/expected_u.bed"
printf "chr1\t100\t200\t1\nchr1\t150\t250\t1\nchr1\t300\t400\t1\n" > "test_data/expected_c.bed"
printf "chr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_f50.bed"
printf "chr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_f30.bed"
printf "chr1\t180\t200\nchr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_f10.bed"
printf "chr1\t180\t200\nchr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_r.bed"
printf "chr1\t180\t200\nchr1\t120\t200\nchr1\t180\t250\nchr1\t150\t220\nchr1\t300\t390\nchr1\t300\t350\n" > "test_data/expected_multiple.bed"
# expected gff file
printf "chr1\t.\tgene\t1200\t1800\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tgene\t1400\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tgene\t1400\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tgene\t1600\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tmRNA\t1200\t1800\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tmRNA\t1400\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tmRNA\t1400\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\tmRNA\t1600\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff"
printf "chr1\t.\texon\t1200\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\texon\t1600\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\tCDS\t1200\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff"
printf "chr1\t.\tCDS\t1600\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff"
# Test 1: Default intersect
mkdir test1
cd test1
echo "> Run bedtools_intersect on BED files with default intersect"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed"
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_default.bed"
echo "- test1 succeeded -"
cd ..
# Test 2: Write A option
mkdir test2
cd test2
echo "> Run bedtools_intersect on BED files with -wa option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--write_a
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_wa.bed"
echo "- test2 succeeded -"
cd ..
# Test 3: -wb option
mkdir test3
cd test3
echo "> Run bedtools_intersect on BED files with -wb option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--write_b
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_wb.bed"
echo "- test3 succeeded -"
cd ..
# Test 4: -loj option
mkdir test4
cd test4
echo "> Run bedtools_intersect on BED files with -loj option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--left_outer_join
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_loj.bed"
echo "- test4 succeeded -"
cd ..
# Test 5: -wo option
mkdir test5
cd test5
echo "> Run bedtools_intersect on BED files with -wo option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--write_overlap
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_wo.bed"
echo "- test5 succeeded -"
cd ..
# Test 6: -u option
mkdir test6
cd test6
echo "> Run bedtools_intersect on BED files with -u option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--report_A_if_no_overlap true
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_u.bed"
echo "- test6 succeeded -"
cd ..
# Test 7: -c option
mkdir test7
cd test7
echo "> Run bedtools_intersect on BED files with -c option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--number_of_overlaps_A true
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_c.bed"
echo "- test7 succeeded -"
cd ..
# Test 8: -f 0.50 option
mkdir test8
cd test8
echo "> Run bedtools_intersect on BED files with -f 0.50 option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--min_overlap_A 0.50
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_f50.bed"
echo "- test8 succeeded -"
cd ..
# Test 9: -f 0.30 option
mkdir test9
cd test9
echo "> Run bedtools_intersect on BED files with -f 0.30 option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--min_overlap_A 0.30
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_f30.bed"
echo "- test9 succeeded -"
cd ..
# Test 10: -f 0.10 option
mkdir test10
cd test10
echo "> Run bedtools_intersect on BED files with -f 0.10 option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--min_overlap_A 0.10
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_f10.bed"
echo "- test10 succeeded -"
cd ..
# Test 11: -r option
mkdir test11
cd test11
echo "> Run bedtools_intersect on BED files with -r option"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--output "output.bed" \
--reciprocal_overlap true
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_r.bed"
echo "- test11 succeeded -"
cd ..
# Test 12: Multiple files
mkdir test12
cd test12
echo "> Run bedtools_intersect on multiple BED files"
"$meta_executable" \
--input_a "../test_data/featuresA.bed" \
--input_b "../test_data/featuresB.bed" \
--input_b "../test_data/featuresC.bed" \
--output "output.bed"
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected_multiple.bed"
echo "- test12 succeeded -"
cd ..
# Test 13: VCF file format
mkdir test13
cd test13
echo "> Run bedtools_intersect on GFF files"
"$meta_executable" \
--input_a "../test_data/example1.gff" \
--input_b "../test_data/example2.gff" \
--output "output.bed"
# checks
assert_file_exists "output.bed"
assert_file_not_empty "output.bed"
assert_identical_content "output.bed" "../test_data/expected.gff"
echo "- test13 succeeded -"
cd ..
echo "---- All tests succeeded! ----"
exit 0