Build branch main_pipeline with version main_pipeline (8914244)

Build pipeline: viash-hub.htrnaseq.main-pipeline-2ljsp

Source commit: 89142447a2

Source message: Merge remote-tracking branch 'origin/main_pipeline' into main_pipeline
This commit is contained in:
CI
2024-08-20 08:55:04 +00:00
parent 749e79dfb1
commit 3e082c8293
21 changed files with 664 additions and 108 deletions

View File

@@ -7,7 +7,7 @@ links:
issue_tracker: https://github.com/viash-hub/htrnaseq/issues
repository: https://github.com/viash-hub/htrnaseq
viash_version: 0.9.0-RC6
viash_version: 0.9.0-RC7
config_mods: |
.requirements.commands := ['ps']

View File

@@ -54,14 +54,21 @@ argument_groups:
multiple: true
direction: output
default: './*'
- name: "--joblog"
type: file
description: Where to store the log file listing all the jobs.
required: false
direction: output
default: "execution_log.txt"
resources:
- type: bash_script
path: script.sh
# test_resources:
# - type: bash_script
# path: test.sh
test_resources:
- type: bash_script
path: test.sh
engines:
- type: docker
image: debian:stable-slim

View File

@@ -16,13 +16,13 @@ par_runThreadN=1
set -eo pipefail
# Check if wildcard character is present in output folder template
echo "Checking if output folder template contains a single wildcard character '*'."
printf "Checking if output folder template ($par_output) contains a single wildcard character '*'. "
output_glob_character="${par_output//[^\*]}"
if [[ "${#output_glob_character}" -ne "1" ]]; then
echo "The value for --output must contain exactly one '*' character."
echo "The value for --output must contain exactly one '*' character. Exiting..."
exit 1
else
echo "Wildcard character found!"
echo "Done, wildcard character found!"
fi
# Split the delimited strings into arrays
@@ -41,7 +41,7 @@ if [ ! "$num_barcodes" -eq "$num_r1_inputs" ] || [ ! "$num_r1_inputs" -eq "$num_
"should be the same, and their order should match."
exit 1
else
echo "Checked if length of barcodes input ($num_barcodes) is " \
echo "Checked if length of barcodes input ($num_barcodes) is "\
"the same as R1 reads ($num_r1_inputs) and R2 reads "\
"($num_r2_inputs). Seems OK!"
fi
@@ -84,6 +84,8 @@ function _run() {
local input_R2="$9"
local par_UMIstart=$(($par_wellBarcodeLength + 1))
set -eo pipefail
echo <<-EOF
Processing $barcode
For the following inputs (lanes):
@@ -96,19 +98,18 @@ function _run() {
# of the file).
echo "$barcode" > "$barcode.txt"
local dir="./${par_output//\*/$barcode}/"
local dir="${par_output//\*/$barcode}/"
echo "Setting output for barcode '$barcode' to '$dir'."
mkdir -p "$dir"
# check if files are compressed
TMPDIR=$(mktemp -d "$meta_temp_dir/parallel_map-$barcode-XXXXXX")
local TMPDIR=$(mktemp -d "$meta_temp_dir/parallel_map-$barcode-XXXXXX")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up RETURN
# Decompress the input files when needed
echo "Checking for compressed fastq files (barcode $barcode)."
# NOTE: for some reason, using STAR's --readFilesCommand does not always work
# This might be because STAR creates fifo files (see https://man7.org/linux/man-pages/man7/fifo.7.html)
# and this requires a filesystem that supports this. Another cause might be that the input files
@@ -116,34 +117,54 @@ function _run() {
# always produced empty BAM files, but also a succesfull exit code (0) so the problem is not reported.
# However, the logs showed the following error: "gzip -: unexpected end of file".
# TODO: could turn this into a function
file "$input_R1" | grep -q 'gzip'
if [[ "${PIPESTATUS[1]}" -ne 0 ]]; then
echo "Detected compressed input files for R1 (barcode $barcode)"
function is_gzipped {
printf "Checking if input '$1' (barcode '$barcode') is gzipped... "
if file "$1" | grep -q 'gzip'; then
echo "Done, detected compressed file."
return
fi
echo "Done, file does not need decompression."
false
}
if is_gzipped $input_R1; then
local compressed_file_name_r1="$(basename -- $input_R1)"
local uncompressed_file_r1="$TMPDIR/$compressed_file_name_r1"
echo "Unpacking input to $uncompressed_file_r1"
local uncompressed_file_r1="$TMPDIR/${compressed_file_name_r1%.gz}"
printf "Unpacking input to $uncompressed_file_r1... "
zcat "$input_R1" > "$uncompressed_file_r1"
echo "Decompression done."
else
echo "$input_R1 is not gzip compressed, assuming to be uncompressed."
local uncompressed_file_r1="$input_R1"
fi
file "$input_R2" | grep -q 'gzip'
if [[ "${PIPESTATUS[1]}" -ne 0 ]]; then
echo "Detected compressed input files for R2 (barcode $barcode)"
if is_gzipped $input_R2; then
local compressed_file_name_r2="$(basename -- $input_R2)"
local uncompressed_file_r2="$TMPDIR/$(basename -- $input_R2)"
echo "Unpacking input to $uncompressed_file_r2"
local uncompressed_file_r2="$TMPDIR/${compressed_file_name_r2%.gz}"
printf "Unpacking input to $uncompressed_file_r2... "
zcat "$input_R2" > "$uncompressed_file_r2"
echo "Decompression done."
else
echo "$input_R2 is not gzip compressed, assuming to be uncompressed."
local uncompressed_file_r2="$input_R2"
fi
local n_input_lines_r1=$(wc -l < "$uncompressed_file_r1")
local n_input_lines_r2=$(wc -l < "$uncompressed_file_r2")
printf "Checking if length of input file mates match. "
if (( $n_input_lines_r1 != n_input_lines_r2 )); then
echo "The length of file $input_R1 ($n_input_lines_r1) does not match with $input_R2 ($n_input_lines_r2)"
return 1
else
echo "Seems OK, $n_input_lines_r1 input lines."
fi
echo "Starting STAR for barcode '$barcode'"
# soloType 'Droplet' is the same as 'CB_UMI_Simple': one UMI and one cell barcode of fixed length.
# By default in this mode, STAR will look for the cell barcode and the UMI int the last files specified with --readFilesIn
# So we need to specify R2 first and R1 second, because R1 contains the barcode and UMI.
# Also, you might be tempted to use '--soloBarcodeMate 1' to alter this behavior, but this requires the clipping
# the barcode from this mate by specifying --clip5pNbases and/or --clip3pNbases, which we do not want to do.
STAR \
--readFilesIn "$uncompressed_file_r1" "$uncompressed_file_r2" \
--readFilesIn "$uncompressed_file_r2" "$uncompressed_file_r1" \
--soloType Droplet \
--quantMode GeneCounts \
--genomeLoad LoadAndKeep \
@@ -165,8 +186,18 @@ function _run() {
--outSAMattributes NH HI nM AS CR UR CB UB GX GN \
--soloCBwhitelist "$barcode.txt" \
--outFileNamePrefix "$dir" \
--outTmpDir "$TMPDIR/STARtemp/" \
${read_command:+--readFilesCommand "$read_command"}
--outTmpDir "$TMPDIR/STARtemp/"
printf "Done running STAR. "
# Check if the number of processed reads is equal to the number of input reads
local n_input_reads=$(($n_input_lines_r1 / 4))
local nr_output_reads=$(grep -Po "Number\ of\ input\ reads \\|\W*\K\d+" "$dir/Log.final.out")
if (( $nr_output_reads != $n_input_reads )); then
echo "Not all input reads were processed for barcode $barcode."
return 1
else
echo "Processed $nr_output_reads reads for barcode $barcode".
fi
}
# Export the function - requires bash
@@ -181,9 +212,6 @@ STAR --genomeLoad LoadAndExit --genomeDir "$par_genomeDir"
# Make sure that parallel uses the correct shell
export PARALLEL_SHELL="/bin/bash"
# Location of the log file that will be created by parallel
log_file=execution_log.txt
# Some notes:
# --halt now,fail=1: instruct parallel to exit when a job has failed and kill remaining running jobs.
#
@@ -195,7 +223,7 @@ log_file=execution_log.txt
parallel_cmd=("parallel" "--jobs" "80%" "--verbose" "--memfree" "2G"
"--tmpdir" "$meta_temp_dir"
"--retry-failed" "--retries" "4" "--halt" "soon,fail=1"
"--joblog" "$log_file" "_run" "{}")
"--joblog" "$par_joblog" "_run" "{}")
# Arguments for which there is one value, so these will not create extra jobs
parallel_cmd+=(":::" "$par_wellBarcodesLength" ":::" "$par_umiLength" ":::" "$par_output" ":::" "$par_genomeDir" ":::" "$par_limitBAMsortRAM" ":::" "$par_runThreadN")
@@ -204,13 +232,16 @@ parallel_cmd+=(":::" "$par_wellBarcodesLength" ":::" "$par_umiLength" ":::" "$pa
# Thus, these argument lists should have the same length.
parallel_cmd+=(":::" "${barcodes[@]}" ":::+" "${input_r1[@]}" ":::+" "${input_r2[@]}")
set +eo pipefail
"${parallel_cmd[@]}"
exit_code=$?
echo "GNU parallel finished!"
# Unload reference
echo "Unloading reference genome"
printf "Unloading reference genome. "
STAR --genomeLoad Remove --genomeDir "$par_genomeDir"
echo "Done!"
# Exit code from GNU parallel:
# If fail=1 is used, the exit status will be the exit status of the failing job.
@@ -231,7 +262,7 @@ if ((exit_code>0)); then
HERE
)
printf $MESSAGE "$(<$log_file)"
printf "$MESSAGE" "$(<$par_joblog)"
exit 1
else
cat <<-HERE

356
src/parallel_map/test.sh Executable file
View File

@@ -0,0 +1,356 @@
set -eo pipefail
## VIASH START
meta_executable="target/executable/parallel_map/parallel_map"
## VIASH END
# Some helper functions
assert_directory_exists() {
[ -d "$1" ] || { echo "File '$1' does not exist" && exit 1; }
}
assert_file_exists() {
[ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
}
assert_file_contains() {
grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
assert_file_contains_regex() {
grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
echo "> Prepare test data in $meta_temp_dir"
TMPDIR=$(mktemp -d --tmpdir="$meta_temp_dir")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up EXIT
# Sample 1, barcode ACAGTCACAG, UMI CTACGGATGA
cat > "$TMPDIR/sample1_R1.fastq" <<'EOF'
@SAMPLE_1_SEQ_ID1
ACAGTCACAGCTACGGATGAGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_1_SEQ_ID2
ACAGTCACAGCTACGGATGAGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAG
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
cat > "$TMPDIR/sample1_R2.fastq" <<'EOF'
@SAMPLE_1_SEQ_ID1
CTCACAGAGAGTCACAACATAGGCGCGGATGTGTGAGGCTTATGAGGC
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_1_SEQ_ID2
CTCACAGAGAGTCACAACATAGGCGCGGATGTGTGAGGCTTATGAGGC
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
# Sample 2, barcode CGGGTTTACC, UMI GCTAGCTAGC
cat > "$TMPDIR/sample2_R1.fastq" << 'EOF'
@SAMPLE_2_SEQ_ID1
CGGGTTTACCGCTAGCTAGCCACCACTATGGTTGGCCGGTTAGTAGTGT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_2_SEQ_ID2
CGGGTTTACCGCTAGCTAGCCACCACTATGGTTGGCCGGTTAGTAGTGT
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
cat > "$TMPDIR/sample2_R2.fastq" <<'EOF'
@SAMPLE_2_SEQ_ID1
ACACTACTAACCGGCCAACCATAGTGGTG
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIII
@SAMPLE_2_SEQ_ID2
ACACTACTAACCGGCCAACCATAGTGGTG
+
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
EOF
# Note that there is a sjdbGTFchrPrefix argument for STAR:
# prefix for chromosome names in a GTF file (default: '-')
cat > "$TMPDIR/genome.fasta" <<'EOF'
>1
TGGCATGAGCCAACGAACGCTGCCTCATAAGCCTCACACATCCGCGCCTATGTTGTGACTCTCTGTGAGCGTTCGTGGG
GCTCGTCACCACTATGGTTGGCCGGTTAGTAGTGTGACTCCTGGTTTTCTGGAGCTTCTTTAAACCGTAGTCCAGTCAA
TGCGAATGGCACTTCACGACGGACTGTCCTTAGCTCAGGGGA
EOF
cat > "$TMPDIR/genes.gtf" <<'EOF'
1 example_source gene 0 72 . + . gene_id "gene1"; gene_name: "GENE1;
1 example_source exon 20 71 . + . gene_id "gene1"; gene_name: "GENE1"; exon_id: gene1_exon1;
1 example_source gene 80 160 . + . gene_id "gene2"; gene_name: "GENE2;
1 example_source exon 80 159 . + . gene_id "gene2"; gene_name: "GENE2"; exon_id: gene2_exon1;
EOF
echo "> Generate index"
STAR \
${meta_cpus:+--runThreadN $meta_cpus} \
--runMode genomeGenerate \
--genomeDir "$TMPDIR/index/" \
--genomeFastaFiles "$TMPDIR/genome.fasta" \
--sjdbGTFfile "$TMPDIR/genes.gtf" \
--genomeSAindexNbases 2 > /dev/null 2>&1
echo "> Run test 1"
run_1_dir="$TMPDIR/run_1"
mkdir -p "$run_1_dir"
pushd "$run_1_dir" > /dev/null
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq;$TMPDIR/sample2_R1.fastq" \
--input_r2 "$TMPDIR/sample1_R2.fastq;$TMPDIR/sample2_R2.fastq" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_*" > /dev/null 2>&1
popd
echo ">> Check if output directories exists"
sample1_out="$TMPDIR/output_ACAGTCACAG"
sample2_out="$TMPDIR/output_CGGGTTTACC"
assert_directory_exists "$sample1_out"
assert_directory_exists "$sample2_out"
echo ">> Check if output files have been created"
for sample in "$sample1_out" "$sample2_out"; do
assert_file_exists "$sample/Aligned.sortedByCoord.out.bam"
assert_file_exists "$sample/Unmapped.out.mate1"
assert_file_exists "$sample/Unmapped.out.mate2"
assert_file_exists "$sample/Log.out"
assert_file_exists "$sample/Log.final.out"
assert_file_exists "$sample/ReadsPerGene.out.tab"
done
echo ">> Check if Solo output is present"
for sample in "$sample1_out" "$sample2_out"; do
assert_directory_exists "$sample1_out/Solo.out"
assert_directory_exists "$sample1_out/Solo.out/Gene"
assert_file_exists "$sample1_out/Solo.out/Barcodes.stats"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/matrix.mtx"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/matrix.mtx"
done
echo ">> Check contents of output"
echo ">>> Sample 1"
assert_file_contains "$sample1_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample1_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample1_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
ACAGTCACAG
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
1 1 1
EOF
echo ">>> Sample 2"
assert_file_contains "$sample2_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample2_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample2_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
CGGGTTTACC
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
2 1 1
EOF
echo "> Run test 2 (compressed input)"
gzip -c "$TMPDIR/sample1_R1.fastq" > "$TMPDIR/sample1_R1.fastq.gz"
gzip -c "$TMPDIR/sample2_R1.fastq" > "$TMPDIR/sample2_R1.fastq.gz"
gzip -c "$TMPDIR/sample1_R2.fastq" > "$TMPDIR/sample1_R2.fastq.gz"
gzip -c "$TMPDIR/sample2_R2.fastq" > "$TMPDIR/sample2_R2.fastq.gz"
run_2_dir="$TMPDIR/run_2"
mkdir -p "$run_2_dir"
pushd "$run_2_dir" > /dev/null
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq.gz;$TMPDIR/sample2_R1.fastq.gz" \
--input_r2 "$TMPDIR/sample1_R2.fastq.gz;$TMPDIR/sample2_R2.fastq.gz" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_gz_*" > /dev/null 2>&1
popd > /dev/null
echo ">> Check if output directories exists"
sample1_out="$TMPDIR/output_gz_ACAGTCACAG"
sample2_out="$TMPDIR/output_gz_CGGGTTTACC"
assert_directory_exists "$sample1_out"
assert_directory_exists "$sample2_out"
echo ">> Check if output files have been created"
for sample in "$sample1_out" "$sample2_out"; do
assert_file_exists "$sample/Aligned.sortedByCoord.out.bam"
assert_file_exists "$sample/Unmapped.out.mate1"
assert_file_exists "$sample/Unmapped.out.mate2"
assert_file_exists "$sample/Log.out"
assert_file_exists "$sample/Log.final.out"
assert_file_exists "$sample/ReadsPerGene.out.tab"
done
echo ">> Check if Solo output is present"
for sample in "$sample1_out" "$sample2_out"; do
assert_directory_exists "$sample1_out/Solo.out"
assert_directory_exists "$sample1_out/Solo.out/Gene"
assert_file_exists "$sample1_out/Solo.out/Barcodes.stats"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/raw/matrix.mtx"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/features.tsv"
assert_file_exists "$sample1_out/Solo.out/Gene/filtered/matrix.mtx"
done
echo ">> Check contents of output"
echo ">>> Sample 1"
assert_file_contains "$sample1_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample1_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample1_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
ACAGTCACAG
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample1_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
1 1 1
EOF
echo ">>> Sample 2"
assert_file_contains "$sample2_out/Solo.out/Barcodes.stats" "yesWLmatchExact 2"
assert_file_contains "$sample2_out/Log.final.out" "Uniquely mapped reads number | 2"
assert_file_contains "$sample2_out/Log.final.out" "Number of input reads | 2"
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/barcodes.tsv" || { echo "Barcodes file is different"; exit 1; }
CGGGTTTACC
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/features.tsv" || { echo "Features file is different"; exit 1; }
gene1 gene1 Gene Expression
gene2 gene2 Gene Expression
EOF
cat << EOF | cmp -s "$sample2_out/Solo.out/Gene/filtered/matrix.mtx" || { echo "Matrix file is different"; exit 1; }
%%MatrixMarket matrix coordinate integer general
%
2 1 1
2 1 1
EOF
echo "> Check that wrong number of barcodes are detected."
run_3_dir="$TMPDIR/run_3"
mkdir -p "$run_3_dir"
pushd "$run_3_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq.gz;$TMPDIR/sample2_R1.fastq.gz" \
--input_r2 "$TMPDIR/sample1_R2.fastq.gz;$TMPDIR/sample2_R2.fastq.gz" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_gz_*" > /dev/null 2>&1 && echo "Expected non-zero exit code " && exit 1
set -eo pipefail
popd > /dev/null
echo "> Check that missing wildcard character is detected."
run_4_dir="$TMPDIR/run_4"
mkdir -p "$run_4_dir"
pushd "$run_4_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq.gz;$TMPDIR/sample2_R1.fastq.gz" \
--input_r2 "$TMPDIR/sample1_R2.fastq.gz;$TMPDIR/sample2_R2.fastq.gz" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_run4" > /dev/null 2>&1 && echo "Expected non-zero exit code." && exit 1
set -eo pipefail
popd > /dev/null
echo "> Check that a mismatch in the length of the input mates is detected."
empty_input_file="$TMPDIR/empty.fastq"
touch "$empty_input_file"
run_5_dir="$TMPDIR/run_5"
mkdir -p "$run_5_dir"
pushd "$run_5_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq;$empty_input_file" \
--input_r2 "$TMPDIR/sample1_R2.fastq;$TMPDIR/sample2_R2.fastq" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_run5_*" > /dev/null 2>&1 && echo "Expected non-zero exit code " && exit 1
set -eo pipefail
popd > /dev/null
echo "> Check that wrong number of input files is detected."
run_6_dir="$TMPDIR/run_6"
mkdir -p "$run_6_dir"
pushd "$run_6_dir" > /dev/null
set +eo pipefail
"$meta_executable" \
--input_r1 "$TMPDIR/sample1_R1.fastq" \
--input_r2 "$TMPDIR/sample1_R2.fastq;$TMPDIR/sample2_R2.fastq" \
--genomeDir "$TMPDIR/index/" \
--barcodes "ACAGTCACAG;CGGGTTTACC" \
--wellBarcodesLength 10 \
--umiLength 10 \
--runThreadN 2 \
--output "$TMPDIR/output_run_6_*" > /dev/null 2>&1 && echo "Expected non-zero exit code " && exit 1
set -eo pipefail
popd > /dev/null

View File

@@ -94,12 +94,28 @@ argument_groups:
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--joblog"
description: "Where to store the log file listing all the jobs."
info: null
default:
- "execution_log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference\
\ (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
@@ -215,7 +231,7 @@ build_info:
output: "target/executable/parallel_map"
executable: "target/executable/parallel_map/parallel_map"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -219,6 +219,11 @@ function ViashHelp {
echo " Location of the output folders, 1 folder per barcode. The value used"
echo " for this argument must contain a '*', which will be replaced with the"
echo " barcode to form the final output location for that barcode."
echo ""
echo " --joblog"
echo " type: file, output, file must exist"
echo " default: execution_log.txt"
echo " Where to store the log file listing all the jobs."
}
# initialise variables
@@ -510,9 +515,9 @@ RUN wget -O $STAR_TARGET $STAR_SOURCE && \
rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION
LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
LABEL org.opencontainers.image.created="2024-07-31T17:23:54Z"
LABEL org.opencontainers.image.created="2024-08-20T08:47:42Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
LABEL org.opencontainers.image.revision="89142447a27bbb6c7ebe6212f093176ce8b42934"
LABEL org.opencontainers.image.version="main_pipeline"
VIASHDOCKER
@@ -756,6 +761,17 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1")
shift 1
;;
--joblog)
[ -n "$VIASH_PAR_JOBLOG" ] && ViashError Bad arguments for option \'--joblog\': \'$VIASH_PAR_JOBLOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_JOBLOG="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --joblog. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--joblog=*)
[ -n "$VIASH_PAR_JOBLOG" ] && ViashError Bad arguments for option \'--joblog=*\': \'$VIASH_PAR_JOBLOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_JOBLOG=$(ViashRemoveFlags "$1")
shift 1
;;
---engine)
VIASH_ENGINE_ID="$2"
shift 2
@@ -972,6 +988,9 @@ fi
if [ -z ${VIASH_PAR_RUNTHREADN+x} ]; then
VIASH_PAR_RUNTHREADN="1"
fi
if [ -z ${VIASH_PAR_JOBLOG+x} ]; then
VIASH_PAR_JOBLOG="execution_log.txt"
fi
# check whether required files exist
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
@@ -1105,6 +1124,9 @@ fi
if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")"
fi
if [ ! -z "$VIASH_PAR_JOBLOG" ] && [ ! -d "$(dirname "$VIASH_PAR_JOBLOG")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_JOBLOG")"
fi
if [ "$VIASH_ENGINE_ID" == "native" ] ; then
if [ "$VIASH_MODE" == "run" ]; then
@@ -1156,6 +1178,11 @@ if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
done
VIASH_PAR_OUTPUT=$(IFS=';' ; echo "${VIASH_TEST_OUTPUT[*]}")
fi
if [ ! -z "$VIASH_PAR_JOBLOG" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_JOBLOG")" )
VIASH_PAR_JOBLOG=$(ViashDockerAutodetectMount "$VIASH_PAR_JOBLOG")
VIASH_CHOWN_VARS+=( "$VIASH_PAR_JOBLOG" )
fi
if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" )
VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR")
@@ -1236,6 +1263,7 @@ $( if [ ! -z ${VIASH_PAR_UMILENGTH+x} ]; then echo "${VIASH_PAR_UMILENGTH}" | se
$( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "${VIASH_PAR_LIMITBAMSORTRAM}" | sed "s#'#'\"'\"'#g;s#.*#par_limitBAMsortRAM='&'#" ; else echo "# par_limitBAMsortRAM="; fi )
$( if [ ! -z ${VIASH_PAR_RUNTHREADN+x} ]; then echo "${VIASH_PAR_RUNTHREADN}" | sed "s#'#'\"'\"'#g;s#.*#par_runThreadN='&'#" ; else echo "# par_runThreadN="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_PAR_JOBLOG+x} ]; then echo "${VIASH_PAR_JOBLOG}" | sed "s#'#'\"'\"'#g;s#.*#par_joblog='&'#" ; else echo "# par_joblog="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -1260,13 +1288,13 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}"
set -eo pipefail
# Check if wildcard character is present in output folder template
echo "Checking if output folder template contains a single wildcard character '*'."
printf "Checking if output folder template (\$par_output) contains a single wildcard character '*'. "
output_glob_character="\${par_output//[^\\*]}"
if [[ "\${#output_glob_character}" -ne "1" ]]; then
echo "The value for --output must contain exactly one '*' character."
echo "The value for --output must contain exactly one '*' character. Exiting..."
exit 1
else
echo "Wildcard character found!"
echo "Done, wildcard character found!"
fi
# Split the delimited strings into arrays
@@ -1285,7 +1313,7 @@ if [ ! "\$num_barcodes" -eq "\$num_r1_inputs" ] || [ ! "\$num_r1_inputs" -eq "\$
"should be the same, and their order should match."
exit 1
else
echo "Checked if length of barcodes input (\$num_barcodes) is " \\
echo "Checked if length of barcodes input (\$num_barcodes) is "\\
"the same as R1 reads (\$num_r1_inputs) and R2 reads "\\
"(\$num_r2_inputs). Seems OK!"
fi
@@ -1328,6 +1356,8 @@ function _run() {
local input_R2="\$9"
local par_UMIstart=\$((\$par_wellBarcodeLength + 1))
set -eo pipefail
echo <<-EOF
Processing \$barcode
For the following inputs (lanes):
@@ -1340,19 +1370,18 @@ function _run() {
# of the file).
echo "\$barcode" > "\$barcode.txt"
local dir="./\${par_output//\\*/\$barcode}/"
local dir="\${par_output//\\*/\$barcode}/"
echo "Setting output for barcode '\$barcode' to '\$dir'."
mkdir -p "\$dir"
# check if files are compressed
TMPDIR=\$(mktemp -d "\$meta_temp_dir/parallel_map-\$barcode-XXXXXX")
local TMPDIR=\$(mktemp -d "\$meta_temp_dir/parallel_map-\$barcode-XXXXXX")
function clean_up {
[[ -d "\$TMPDIR" ]] && rm -r "\$TMPDIR"
}
trap clean_up RETURN
# Decompress the input files when needed
echo "Checking for compressed fastq files (barcode \$barcode)."
# NOTE: for some reason, using STAR's --readFilesCommand does not always work
# This might be because STAR creates fifo files (see https://man7.org/linux/man-pages/man7/fifo.7.html)
# and this requires a filesystem that supports this. Another cause might be that the input files
@@ -1360,34 +1389,54 @@ function _run() {
# always produced empty BAM files, but also a succesfull exit code (0) so the problem is not reported.
# However, the logs showed the following error: "gzip -: unexpected end of file".
# TODO: could turn this into a function
file "\$input_R1" | grep -q 'gzip'
if [[ "\${PIPESTATUS[1]}" -ne 0 ]]; then
echo "Detected compressed input files for R1 (barcode \$barcode)"
function is_gzipped {
printf "Checking if input '\$1' (barcode '\$barcode') is gzipped... "
if file "\$1" | grep -q 'gzip'; then
echo "Done, detected compressed file."
return
fi
echo "Done, file does not need decompression."
false
}
if is_gzipped \$input_R1; then
local compressed_file_name_r1="\$(basename -- \$input_R1)"
local uncompressed_file_r1="\$TMPDIR/\$compressed_file_name_r1"
echo "Unpacking input to \$uncompressed_file_r1"
local uncompressed_file_r1="\$TMPDIR/\${compressed_file_name_r1%.gz}"
printf "Unpacking input to \$uncompressed_file_r1... "
zcat "\$input_R1" > "\$uncompressed_file_r1"
echo "Decompression done."
else
echo "\$input_R1 is not gzip compressed, assuming to be uncompressed."
local uncompressed_file_r1="\$input_R1"
fi
file "\$input_R2" | grep -q 'gzip'
if [[ "\${PIPESTATUS[1]}" -ne 0 ]]; then
echo "Detected compressed input files for R2 (barcode \$barcode)"
if is_gzipped \$input_R2; then
local compressed_file_name_r2="\$(basename -- \$input_R2)"
local uncompressed_file_r2="\$TMPDIR/\$(basename -- \$input_R2)"
echo "Unpacking input to \$uncompressed_file_r2"
local uncompressed_file_r2="\$TMPDIR/\${compressed_file_name_r2%.gz}"
printf "Unpacking input to \$uncompressed_file_r2... "
zcat "\$input_R2" > "\$uncompressed_file_r2"
echo "Decompression done."
else
echo "\$input_R2 is not gzip compressed, assuming to be uncompressed."
local uncompressed_file_r2="\$input_R2"
fi
local n_input_lines_r1=\$(wc -l < "\$uncompressed_file_r1")
local n_input_lines_r2=\$(wc -l < "\$uncompressed_file_r2")
printf "Checking if length of input file mates match. "
if (( \$n_input_lines_r1 != n_input_lines_r2 )); then
echo "The length of file \$input_R1 (\$n_input_lines_r1) does not match with \$input_R2 (\$n_input_lines_r2)"
return 1
else
echo "Seems OK, \$n_input_lines_r1 input lines."
fi
echo "Starting STAR for barcode '\$barcode'"
# soloType 'Droplet' is the same as 'CB_UMI_Simple': one UMI and one cell barcode of fixed length.
# By default in this mode, STAR will look for the cell barcode and the UMI int the last files specified with --readFilesIn
# So we need to specify R2 first and R1 second, because R1 contains the barcode and UMI.
# Also, you might be tempted to use '--soloBarcodeMate 1' to alter this behavior, but this requires the clipping
# the barcode from this mate by specifying --clip5pNbases and/or --clip3pNbases, which we do not want to do.
STAR \\
--readFilesIn "\$uncompressed_file_r1" "\$uncompressed_file_r2" \\
--readFilesIn "\$uncompressed_file_r2" "\$uncompressed_file_r1" \\
--soloType Droplet \\
--quantMode GeneCounts \\
--genomeLoad LoadAndKeep \\
@@ -1409,8 +1458,18 @@ function _run() {
--outSAMattributes NH HI nM AS CR UR CB UB GX GN \\
--soloCBwhitelist "\$barcode.txt" \\
--outFileNamePrefix "\$dir" \\
--outTmpDir "\$TMPDIR/STARtemp/" \\
\${read_command:+--readFilesCommand "\$read_command"}
--outTmpDir "\$TMPDIR/STARtemp/"
printf "Done running STAR. "
# Check if the number of processed reads is equal to the number of input reads
local n_input_reads=\$((\$n_input_lines_r1 / 4))
local nr_output_reads=\$(grep -Po "Number\\ of\\ input\\ reads \\\\|\\W*\\K\\d+" "\$dir/Log.final.out")
if (( \$nr_output_reads != \$n_input_reads )); then
echo "Not all input reads were processed for barcode \$barcode."
return 1
else
echo "Processed \$nr_output_reads reads for barcode \$barcode".
fi
}
# Export the function - requires bash
@@ -1425,9 +1484,6 @@ STAR --genomeLoad LoadAndExit --genomeDir "\$par_genomeDir"
# Make sure that parallel uses the correct shell
export PARALLEL_SHELL="/bin/bash"
# Location of the log file that will be created by parallel
log_file=execution_log.txt
# Some notes:
# --halt now,fail=1: instruct parallel to exit when a job has failed and kill remaining running jobs.
#
@@ -1439,7 +1495,7 @@ log_file=execution_log.txt
parallel_cmd=("parallel" "--jobs" "80%" "--verbose" "--memfree" "2G"
"--tmpdir" "\$meta_temp_dir"
"--retry-failed" "--retries" "4" "--halt" "soon,fail=1"
"--joblog" "\$log_file" "_run" "{}")
"--joblog" "\$par_joblog" "_run" "{}")
# Arguments for which there is one value, so these will not create extra jobs
parallel_cmd+=(":::" "\$par_wellBarcodesLength" ":::" "\$par_umiLength" ":::" "\$par_output" ":::" "\$par_genomeDir" ":::" "\$par_limitBAMsortRAM" ":::" "\$par_runThreadN")
@@ -1448,13 +1504,16 @@ parallel_cmd+=(":::" "\$par_wellBarcodesLength" ":::" "\$par_umiLength" ":::" "\
# Thus, these argument lists should have the same length.
parallel_cmd+=(":::" "\${barcodes[@]}" ":::+" "\${input_r1[@]}" ":::+" "\${input_r2[@]}")
set +eo pipefail
"\${parallel_cmd[@]}"
exit_code=\$?
echo "GNU parallel finished!"
# Unload reference
echo "Unloading reference genome"
printf "Unloading reference genome. "
STAR --genomeLoad Remove --genomeDir "\$par_genomeDir"
echo "Done!"
# Exit code from GNU parallel:
# If fail=1 is used, the exit status will be the exit status of the failing job.
@@ -1475,7 +1534,7 @@ if ((exit_code>0)); then
HERE
)
printf \$MESSAGE "\$(<\$log_file)"
printf "\$MESSAGE" "\$(<\$par_joblog)"
exit 1
else
cat <<-HERE
@@ -1529,6 +1588,9 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT")
fi
if [ ! -z "$VIASH_PAR_JOBLOG" ]; then
VIASH_PAR_JOBLOG=$(ViashDockerStripAutomount "$VIASH_PAR_JOBLOG")
fi
if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then
VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR")
fi
@@ -1549,6 +1611,10 @@ if [ ! -z "$VIASH_PAR_OUTPUT" ] && ! compgen -G "$VIASH_PAR_OUTPUT" > /dev/null;
ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_JOBLOG" ] && [ ! -e "$VIASH_PAR_JOBLOG" ]; then
ViashError "Output file '$VIASH_PAR_JOBLOG' does not exist."
exit 1
fi
exit 0

View File

@@ -94,12 +94,28 @@ argument_groups:
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--joblog"
description: "Where to store the log file listing all the jobs."
info: null
default:
- "execution_log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference\
\ (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
@@ -215,7 +231,7 @@ build_info:
output: "target/nextflow/parallel_map"
executable: "target/nextflow/parallel_map/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -2893,6 +2893,20 @@ meta = [
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--joblog",
"description" : "Where to store the log file listing all the jobs.",
"default" : [
"execution_log.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
@@ -2905,6 +2919,13 @@ meta = [
}
],
"description" : "Map wells in batch, using STAR\nSpliced Transcripts Alignment to a Reference (C) Alexander Dobin\nhttps://github.com/alexdobin/STAR\n",
"test_resources" : [
{
"type" : "bash_script",
"path" : "test.sh",
"is_executable" : true
}
],
"status" : "enabled",
"requirements" : {
"commands" : [
@@ -3038,7 +3059,7 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/parallel_map",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3094,6 +3115,7 @@ $( if [ ! -z ${VIASH_PAR_UMILENGTH+x} ]; then echo "${VIASH_PAR_UMILENGTH}" | se
$( if [ ! -z ${VIASH_PAR_LIMITBAMSORTRAM+x} ]; then echo "${VIASH_PAR_LIMITBAMSORTRAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limitBAMsortRAM='&'#" ; else echo "# par_limitBAMsortRAM="; fi )
$( if [ ! -z ${VIASH_PAR_RUNTHREADN+x} ]; then echo "${VIASH_PAR_RUNTHREADN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_runThreadN='&'#" ; else echo "# par_runThreadN="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_PAR_JOBLOG+x} ]; then echo "${VIASH_PAR_JOBLOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_joblog='&'#" ; else echo "# par_joblog="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3118,13 +3140,13 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}"
set -eo pipefail
# Check if wildcard character is present in output folder template
echo "Checking if output folder template contains a single wildcard character '*'."
printf "Checking if output folder template (\\$par_output) contains a single wildcard character '*'. "
output_glob_character="\\${par_output//[^\\\\*]}"
if [[ "\\${#output_glob_character}" -ne "1" ]]; then
echo "The value for --output must contain exactly one '*' character."
echo "The value for --output must contain exactly one '*' character. Exiting..."
exit 1
else
echo "Wildcard character found!"
echo "Done, wildcard character found!"
fi
# Split the delimited strings into arrays
@@ -3143,7 +3165,7 @@ if [ ! "\\$num_barcodes" -eq "\\$num_r1_inputs" ] || [ ! "\\$num_r1_inputs" -eq
"should be the same, and their order should match."
exit 1
else
echo "Checked if length of barcodes input (\\$num_barcodes) is " \\\\
echo "Checked if length of barcodes input (\\$num_barcodes) is "\\\\
"the same as R1 reads (\\$num_r1_inputs) and R2 reads "\\\\
"(\\$num_r2_inputs). Seems OK!"
fi
@@ -3186,6 +3208,8 @@ function _run() {
local input_R2="\\$9"
local par_UMIstart=\\$((\\$par_wellBarcodeLength + 1))
set -eo pipefail
echo <<-EOF
Processing \\$barcode
For the following inputs (lanes):
@@ -3198,19 +3222,18 @@ function _run() {
# of the file).
echo "\\$barcode" > "\\$barcode.txt"
local dir="./\\${par_output//\\\\*/\\$barcode}/"
local dir="\\${par_output//\\\\*/\\$barcode}/"
echo "Setting output for barcode '\\$barcode' to '\\$dir'."
mkdir -p "\\$dir"
# check if files are compressed
TMPDIR=\\$(mktemp -d "\\$meta_temp_dir/parallel_map-\\$barcode-XXXXXX")
local TMPDIR=\\$(mktemp -d "\\$meta_temp_dir/parallel_map-\\$barcode-XXXXXX")
function clean_up {
[[ -d "\\$TMPDIR" ]] && rm -r "\\$TMPDIR"
}
trap clean_up RETURN
# Decompress the input files when needed
echo "Checking for compressed fastq files (barcode \\$barcode)."
# NOTE: for some reason, using STAR's --readFilesCommand does not always work
# This might be because STAR creates fifo files (see https://man7.org/linux/man-pages/man7/fifo.7.html)
# and this requires a filesystem that supports this. Another cause might be that the input files
@@ -3218,34 +3241,54 @@ function _run() {
# always produced empty BAM files, but also a succesfull exit code (0) so the problem is not reported.
# However, the logs showed the following error: "gzip -: unexpected end of file".
# TODO: could turn this into a function
file "\\$input_R1" | grep -q 'gzip'
if [[ "\\${PIPESTATUS[1]}" -ne 0 ]]; then
echo "Detected compressed input files for R1 (barcode \\$barcode)"
function is_gzipped {
printf "Checking if input '\\$1' (barcode '\\$barcode') is gzipped... "
if file "\\$1" | grep -q 'gzip'; then
echo "Done, detected compressed file."
return
fi
echo "Done, file does not need decompression."
false
}
if is_gzipped \\$input_R1; then
local compressed_file_name_r1="\\$(basename -- \\$input_R1)"
local uncompressed_file_r1="\\$TMPDIR/\\$compressed_file_name_r1"
echo "Unpacking input to \\$uncompressed_file_r1"
local uncompressed_file_r1="\\$TMPDIR/\\${compressed_file_name_r1%.gz}"
printf "Unpacking input to \\$uncompressed_file_r1... "
zcat "\\$input_R1" > "\\$uncompressed_file_r1"
echo "Decompression done."
else
echo "\\$input_R1 is not gzip compressed, assuming to be uncompressed."
local uncompressed_file_r1="\\$input_R1"
fi
file "\\$input_R2" | grep -q 'gzip'
if [[ "\\${PIPESTATUS[1]}" -ne 0 ]]; then
echo "Detected compressed input files for R2 (barcode \\$barcode)"
if is_gzipped \\$input_R2; then
local compressed_file_name_r2="\\$(basename -- \\$input_R2)"
local uncompressed_file_r2="\\$TMPDIR/\\$(basename -- \\$input_R2)"
echo "Unpacking input to \\$uncompressed_file_r2"
local uncompressed_file_r2="\\$TMPDIR/\\${compressed_file_name_r2%.gz}"
printf "Unpacking input to \\$uncompressed_file_r2... "
zcat "\\$input_R2" > "\\$uncompressed_file_r2"
echo "Decompression done."
else
echo "\\$input_R2 is not gzip compressed, assuming to be uncompressed."
local uncompressed_file_r2="\\$input_R2"
fi
local n_input_lines_r1=\\$(wc -l < "\\$uncompressed_file_r1")
local n_input_lines_r2=\\$(wc -l < "\\$uncompressed_file_r2")
printf "Checking if length of input file mates match. "
if (( \\$n_input_lines_r1 != n_input_lines_r2 )); then
echo "The length of file \\$input_R1 (\\$n_input_lines_r1) does not match with \\$input_R2 (\\$n_input_lines_r2)"
return 1
else
echo "Seems OK, \\$n_input_lines_r1 input lines."
fi
echo "Starting STAR for barcode '\\$barcode'"
# soloType 'Droplet' is the same as 'CB_UMI_Simple': one UMI and one cell barcode of fixed length.
# By default in this mode, STAR will look for the cell barcode and the UMI int the last files specified with --readFilesIn
# So we need to specify R2 first and R1 second, because R1 contains the barcode and UMI.
# Also, you might be tempted to use '--soloBarcodeMate 1' to alter this behavior, but this requires the clipping
# the barcode from this mate by specifying --clip5pNbases and/or --clip3pNbases, which we do not want to do.
STAR \\\\
--readFilesIn "\\$uncompressed_file_r1" "\\$uncompressed_file_r2" \\\\
--readFilesIn "\\$uncompressed_file_r2" "\\$uncompressed_file_r1" \\\\
--soloType Droplet \\\\
--quantMode GeneCounts \\\\
--genomeLoad LoadAndKeep \\\\
@@ -3267,8 +3310,18 @@ function _run() {
--outSAMattributes NH HI nM AS CR UR CB UB GX GN \\\\
--soloCBwhitelist "\\$barcode.txt" \\\\
--outFileNamePrefix "\\$dir" \\\\
--outTmpDir "\\$TMPDIR/STARtemp/" \\\\
\\${read_command:+--readFilesCommand "\\$read_command"}
--outTmpDir "\\$TMPDIR/STARtemp/"
printf "Done running STAR. "
# Check if the number of processed reads is equal to the number of input reads
local n_input_reads=\\$((\\$n_input_lines_r1 / 4))
local nr_output_reads=\\$(grep -Po "Number\\\\ of\\\\ input\\\\ reads \\\\\\\\|\\\\W*\\\\K\\\\d+" "\\$dir/Log.final.out")
if (( \\$nr_output_reads != \\$n_input_reads )); then
echo "Not all input reads were processed for barcode \\$barcode."
return 1
else
echo "Processed \\$nr_output_reads reads for barcode \\$barcode".
fi
}
# Export the function - requires bash
@@ -3283,9 +3336,6 @@ STAR --genomeLoad LoadAndExit --genomeDir "\\$par_genomeDir"
# Make sure that parallel uses the correct shell
export PARALLEL_SHELL="/bin/bash"
# Location of the log file that will be created by parallel
log_file=execution_log.txt
# Some notes:
# --halt now,fail=1: instruct parallel to exit when a job has failed and kill remaining running jobs.
#
@@ -3297,7 +3347,7 @@ log_file=execution_log.txt
parallel_cmd=("parallel" "--jobs" "80%" "--verbose" "--memfree" "2G"
"--tmpdir" "\\$meta_temp_dir"
"--retry-failed" "--retries" "4" "--halt" "soon,fail=1"
"--joblog" "\\$log_file" "_run" "{}")
"--joblog" "\\$par_joblog" "_run" "{}")
# Arguments for which there is one value, so these will not create extra jobs
parallel_cmd+=(":::" "\\$par_wellBarcodesLength" ":::" "\\$par_umiLength" ":::" "\\$par_output" ":::" "\\$par_genomeDir" ":::" "\\$par_limitBAMsortRAM" ":::" "\\$par_runThreadN")
@@ -3306,13 +3356,16 @@ parallel_cmd+=(":::" "\\$par_wellBarcodesLength" ":::" "\\$par_umiLength" ":::"
# Thus, these argument lists should have the same length.
parallel_cmd+=(":::" "\\${barcodes[@]}" ":::+" "\\${input_r1[@]}" ":::+" "\\${input_r2[@]}")
set +eo pipefail
"\\${parallel_cmd[@]}"
exit_code=\\$?
echo "GNU parallel finished!"
# Unload reference
echo "Unloading reference genome"
printf "Unloading reference genome. "
STAR --genomeLoad Remove --genomeDir "\\$par_genomeDir"
echo "Done!"
# Exit code from GNU parallel:
# If fail=1 is used, the exit status will be the exit status of the failing job.
@@ -3333,7 +3386,7 @@ if ((exit_code>0)); then
HERE
)
printf \\$MESSAGE "\\$(<\\$log_file)"
printf "\\$MESSAGE" "\\$(<\\$par_joblog)"
exit 1
else
cat <<-HERE

View File

@@ -136,6 +136,17 @@
}
,
"joblog": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.joblog.txt`. Where to store the log file listing all the jobs",
"help_text": "Type: `file`, default: `$id.$key.joblog.txt`. Where to store the log file listing all the jobs."
,
"default": "$id.$key.joblog.txt"
}
}
},

View File

@@ -190,7 +190,7 @@ build_info:
output: "target/nextflow/workflows/htrnaseq"
executable: "target/nextflow/workflows/htrnaseq/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/workflows/well_demultiplex"

View File

@@ -3030,7 +3030,7 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/htrnaseq",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -136,7 +136,7 @@ build_info:
output: "target/nextflow/workflows/utils/groupLanes"
executable: "target/nextflow/workflows/utils/groupLanes/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -2943,7 +2943,7 @@ meta = [
"engine" : "native",
"output" : "target/nextflow/workflows/utils/groupLanes",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -124,7 +124,7 @@ build_info:
output: "target/nextflow/workflows/utils/groupPairs"
executable: "target/nextflow/workflows/utils/groupPairs/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -2929,7 +2929,7 @@ meta = [
"engine" : "native",
"output" : "target/nextflow/workflows/utils/groupPairs",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -156,7 +156,7 @@ build_info:
output: "target/nextflow/workflows/utils/groupWells"
executable: "target/nextflow/workflows/utils/groupWells/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -2967,7 +2967,7 @@ meta = [
"engine" : "native",
"output" : "target/nextflow/workflows/utils/groupWells",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -142,7 +142,7 @@ build_info:
output: "target/nextflow/workflows/utils/splitWells"
executable: "target/nextflow/workflows/utils/splitWells/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -2950,7 +2950,7 @@ meta = [
"engine" : "native",
"output" : "target/nextflow/workflows/utils/splitWells",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -151,7 +151,7 @@ build_info:
output: "target/nextflow/workflows/well_demultiplex"
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
viash_version: "0.9.0-RC6"
git_commit: "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02"
git_commit: "89142447a27bbb6c7ebe6212f093176ce8b42934"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/v0.1.0/nextflow/cutadapt"

View File

@@ -2969,7 +2969,7 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/well_demultiplex",
"viash_version" : "0.9.0-RC6",
"git_commit" : "a1aaf1d7a91e6d79c00b86ead7e55bc957ba5c02",
"git_commit" : "89142447a27bbb6c7ebe6212f093176ce8b42934",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {