diff --git a/_viash.yaml b/_viash.yaml index 399d65c..15f3b5d 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -17,7 +17,7 @@ config_mods: | repositories: - name: biobox type: vsh - repo: vsh/biobox + repo: biobox tag: main - name: craftbox type: vsh diff --git a/src/bbmap_bbsplit/config.vsh.yaml b/src/bbmap_bbsplit/config.vsh.yaml deleted file mode 100644 index 0bb1f25..0000000 --- a/src/bbmap_bbsplit/config.vsh.yaml +++ /dev/null @@ -1,89 +0,0 @@ -name: "bbmap_bbsplit" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/bbmap/bbsplit/main.nf, modules/nf-core/bbmap/bbsplit/meta.yml] - last_sha: 277bd337739a8b8f753fa7b5eda6743b9b6acb89 - -description: | - Split sequencing reads by mapping them to multiple references simultaneously. - -argument_groups: -- name: "Input" - arguments: - - name: "--id" - type: string - description: Sample ID - - name: "--paired" - type: boolean - default: false - description: Paired fastq files or not? - - name: "--input" - type: file - multiple: true - multiple_sep: "," - description: Input fastq files, either one or two (paired) - example: sample.fastq - - name: "--primary_ref" - type: file - description: Primary reference FASTA - - name: "--bbsplit_fasta_list" - type: file - description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. - - name: "--only_build_index" - type: boolean - description: true = only build index; false = mapping - - name: "--built_bbsplit_index" - type: file - description: Directory with index files - -- name: "Output" - arguments: - - name: "--fastq_1" - type: file - required: false - description: Output file for read 1. - direction: output - must_exist: false - default: $id.$key.read_1.fastq - - name: "--fastq_2" - type: file - required: false - must_exist: false - description: Output file for read 2. - direction: output - default: $id.$key.read_2.fastq - - name: "--bbsplit_index" - type: file - description: Directory with index files - direction: output - must_exist: false - default: BBSplit_index - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/minimal_test/reference/genome.fasta - - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz - - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz - - path: /testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa - - path: /testData/minimal_test/reference/bbsplit_fasta/human.fa - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: docker - run: | - apt-get update && \ - apt-get install -y build-essential openjdk-17-jdk wget tar && \ - wget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \ - tar xzf BBMap_39.01.tar.gz && \ - cp -r bbmap/* /usr/local/bin -runners: - - type: executable - - type: nextflow diff --git a/src/bbmap_bbsplit/script.sh b/src/bbmap_bbsplit/script.sh deleted file mode 100755 index b207e07..0000000 --- a/src/bbmap_bbsplit/script.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "$tmpdir" -} -trap clean_up EXIT - -avail_mem=3072 - -if [ ! -d "$par_built_bbsplit_index" ]; then - other_refs=() - while IFS="," read -r name path - do - other_refs+=("ref_$name=$path") - done < "$par_bbsplit_fasta_list" -fi - -if $par_only_build_index; then - if [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then - bbsplit.sh \ - -Xmx${avail_mem}M \ - ref_primary="$par_primary_ref" ${other_refs[@]} \ - path=$par_bbsplit_index \ - threads=${meta_cpus:-1} - else - echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files." - fi -else - IFS="," read -ra input <<< "$par_input" - tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") - index_files='' - if [ -d "$par_built_bbsplit_index" ]; then - index_files="path=$par_built_bbsplit_index" - elif [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then - index_files="ref_primary=$par_primary_ref ${other_refs[@]}" - else - echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files." - fi - if $par_paired; then - bbsplit.sh \ - -Xmx${avail_mem}M \ - $index_files \ - threads=${meta_cpus:-1} \ - in=${input[0]} \ - in2=${input[1]} \ - basename=${tmpdir}/%_#.fastq \ - refstats=bbsplit_stats.txt - read1=$(find $tmpdir/ -iname primary_1*) - read2=$(find $tmpdir/ -iname primary_2*) - cp $read1 $par_fastq_1 - cp $read2 $par_fastq_2 - else - bbsplit.sh \ - -Xmx${avail_mem}M \ - $index_files \ - threads=${meta_cpus:-1} \ - in=${input[0]} \ - basename=${tmpdir}/%.fastq \ - refstats=bbsplit_stats.txt - read1=$(find $tmpdir/ -iname primary*) - cp $read1 $par_fastq_1 - fi -fi diff --git a/src/bbmap_bbsplit/test.sh b/src/bbmap_bbsplit/test.sh deleted file mode 100644 index 03ebfef..0000000 --- a/src/bbmap_bbsplit/test.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash - -echo ">>> Test $meta_functionality_name" - -cat > bbsplit_fasta_list.txt << HERE -sarscov2,$meta_resources_dir/sarscov2.fa -human,$meta_resources_dir/human.fa -HERE - -echo ">>> Building BBSplit index" -"$meta_executable" \ - --primary_ref "$meta_resources_dir/genome.fasta" \ - --bbsplit_fasta_list "bbsplit_fasta_list.txt" \ - --only_build_index true \ - --bbsplit_index "BBSplit_index" - -echo ">>> Check whether output exists" -[ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1 -[ -z "$(ls -A 'BBSplit_index')" ] && echo "BBSplit index is empty!" && exit 1 - -echo ">>> Filtering ribosomal RNA reads" - -echo ">>> Testing with single-end reads and primary/non-primary FASTA files" -"$meta_executable" \ - --paired false \ - --input "$meta_resources_dir/SRR6357070_1.fastq.gz" \ - --only_build_index false \ - --primary_ref "$meta_resources_dir/genome.fasta" \ - --bbsplit_fasta_list "bbsplit_fasta_list.txt" \ - --fastq_1 "filtered_SRR6357070_1.fastq.gz" - -echo ">>> Check whether output exists" -[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1 -[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1 - -rm filtered_SRR6357070_1.fastq.gz - -echo ">>> Testing with paired-end reads and primary/non-primary FASTA files" -"$meta_executable" \ - --paired true \ - --input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \ - --only_build_index false \ - --primary_ref "$meta_resources_dir/genome.fasta" \ - --bbsplit_fasta_list "bbsplit_fasta_list.txt" \ - --fastq_1 "filtered_SRR6357070_1.fastq.gz" \ - --fastq_2 "filtered_SRR6357070_2.fastq.gz" - -echo ">>> Check whether output exists" -[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1 -[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1 -[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1 -[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1 - -rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz - -echo ">>> Testing with single-end reads and BBSplit index" -"$meta_executable" \ - --paired false \ - --input "$meta_resources_dir/SRR6357070_1.fastq.gz" \ - --only_build_index false \ - --built_bbsplit_index "BBSplit_index" \ - --fastq_1 "filtered_SRR6357070_1.fastq.gz" - -echo ">>> Check whether output exists" -[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1 -[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1 - -echo ">>> Testing with paired-end reads and BBSplit index" -"$meta_executable" \ - --paired true \ - --input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \ - --only_build_index false \ - --built_bbsplit_index "BBSplit_index" \ - --fastq_1 "filtered_SRR6357070_1.fastq.gz" \ - --fastq_2 "filtered_SRR6357070_2.fastq.gz" - -echo ">>> Check whether output exists" -[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1 -[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1 -[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1 -[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1 - -rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz - -echo "All tests succeeded!" -exit 0 \ No newline at end of file diff --git a/src/fastqc/config.vsh.yaml b/src/fastqc/config.vsh.yaml deleted file mode 100644 index 57e7a8b..0000000 --- a/src/fastqc/config.vsh.yaml +++ /dev/null @@ -1,71 +0,0 @@ -name: "fastqc" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/fastqc/main.nf, modules/nf-core/fastqc/meta.yml] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory. - -argument_groups: -- name: "Input" - arguments: - - name: "--paired" - type: boolean - required: false - default: false - description: Paired fastq files or not? - - name: "--input" - type: file - required: true - multiple: true - multiple_sep: "," - description: Input fastq files, either one or two (paired) - example: sample.fastq - -- name: "Output" - arguments: - - name: "--fastqc_html_1" - type: file - direction: output - description: FastQC HTML report for read 1. - default: $id.read_1.fastqc.html - - name: "--fastqc_html_2" - type: file - direction: output - description: FastQC HTML report for read 2. - required: false - must_exist: false - default: $id.read_2.fastqc.html - - name: "--fastqc_zip_1" - type: file - direction: output - description: FastQC report archive for read 1. - default: $id.read_1.fastqc.zip - - name: "--fastqc_zip_2" - type: file - direction: output - description: FastQC report archive for read 2. - required: false - must_exist: false - default: $id.read_2.fastqc.zip - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz - - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz - -engines: - - type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [ fastqc ] -runners: - - type: executable - - type: nextflow diff --git a/src/fastqc/script.sh b/src/fastqc/script.sh deleted file mode 100644 index 808d300..0000000 --- a/src/fastqc/script.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "$tmpdir" -} -trap clean_up EXIT - -tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX") - -IFS="," read -ra input <<< $par_input -count=${#input[@]} - -if $par_paired; then - echo "Paired - $count" - if [ $count -ne 2 ]; then - echo "Paired end input requires two files" - exit 1 - fi -else - echo "Not Paired - $count" - if [ $count -ne 1 ]; then - echo "Single end input requires one file" - exit 1 - fi -fi - -fastqc -o $tmpdir ${input[*]} - -file1=$(basename -- "${input[0]}") -read1="${file1%.fastq*}" -[[ -e "${tmpdir}/${read1}_fastqc.html" ]] && cp "${tmpdir}/${read1}_fastqc.html" $par_fastqc_html_1 -[[ -e "${tmpdir}/${read1}_fastqc.zip" ]] && cp "${tmpdir}/${read1}_fastqc.zip" $par_fastqc_zip_1 - -if $par_paired; then - file2=$(basename -- "${input[1]}") - read2="${file2%.fastq*}" - [[ -e "${tmpdir}/${read2}_fastqc.html" ]] && cp "${tmpdir}/${read2}_fastqc.html" $par_fastqc_html_2 - [[ -e "${tmpdir}/${read2}_fastqc.zip" ]] && cp "${tmpdir}/${read2}_fastqc.zip" $par_fastqc_zip_2 -fi \ No newline at end of file diff --git a/src/fastqc/test.sh b/src/fastqc/test.sh deleted file mode 100644 index d66a1c9..0000000 --- a/src/fastqc/test.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -echo ">>> Testing for paired-end reads" - -"$meta_executable" \ - --paired true \ - --input $meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz \ - --fastqc_html_1 SRR6357070_1.html \ - --fastqc_html_2 SRR6357070_2.html \ - --fastqc_zip_1 SRR6357070_1.zip \ - --fastqc_zip_2 SRR6357070_2.zip - -echo ">> Checking if the correct files are present" -[[ ! -f "SRR6357070_1.html" ]] || [[ ! -f "SRR6357070_2.html" ]] && echo "Report file missing" && exit 1 -[[ ! -s "SRR6357070_1.html" ]] || [[ ! -s "SRR6357070_2.html" ]] && echo "Report file empty" && exit 1 -[[ ! -f "SRR6357070_1.zip" ]] || [[ ! -f "SRR6357070_2.zip" ]] && echo "Zip file missing" && exit 1 - -rm SRR6357070_1.html SRR6357070_2.html SRR6357070_1.zip SRR6357070_2.zip - -echo ">>> Testing for single-end reads" -"$meta_executable" \ - --paired false \ - --input $meta_resources_dir/SRR6357070_1.fastq.gz \ - --fastqc_html_1 SRR6357070_1.html \ - --fastqc_zip_1 SRR6357070_1.zip - -echo ">> Checking if the correct files are present" -[ ! -f "SRR6357070_1.html" ] && echo "Report file missing" && exit 1 -[ ! -s "SRR6357070_1.html" ] && echo "Report file empty" && exit 1 -[ ! -f "SRR6357070_1.zip" ] && echo "Zip file missing" && exit 1 - -echo ">>> Test finished successfully" -exit 0 diff --git a/src/fq_subsample/config.vsh.yaml b/src/fq_subsample/config.vsh.yaml deleted file mode 100644 index 893426d..0000000 --- a/src/fq_subsample/config.vsh.yaml +++ /dev/null @@ -1,66 +0,0 @@ -name: "fq_subsample" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/fq/subsample/main.nf, modules/nf-core/fq/subsample/meta.yml] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args - -argument_groups: -- name: "Input" - arguments: - - name: "--input" - type: file - description: Input fastq files to subsample - multiple: true - multiple_sep: ";" - - name: "--extra_args" - type: string - default: "" - description: Extra arguments to pass to fq subsample - -- name: "Input" - arguments: - - name: "--output_1" - type: file - direction: output - default: $id.read_1.subsampled.fastq - description: Sampled read 1 fastq files - - name: "--output_2" - type: file - must_exist: false - direction: output - default: $id.read_2.subsampled.fastq - description: Sampled read 2 fastq files - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz - - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz - -engines: - - type: docker - image: ubuntu:22.04 - setup: - - type: docker - env: - - TZ=Europe/Brussels - run: | - ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ - apt-get update && \ - apt-get install -y --no-install-recommends build-essential git-all curl && \ - curl https://sh.rustup.rs -sSf | sh -s -- -y && \ - . "$HOME/.cargo/env" && \ - git clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \ - mv fq /usr/local/ && cd /usr/local/fq && \ - cargo install --locked --path . && \ - mv /usr/local/fq/target/release/fq /usr/local/bin/ -runners: - - type: executable - - type: nextflow diff --git a/src/fq_subsample/script.sh b/src/fq_subsample/script.sh deleted file mode 100644 index 26b0636..0000000 --- a/src/fq_subsample/script.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -IFS=";" read -ra input <<< $par_input -n_fastq=${#input[@]} - -required_args=("-p" "--probability" "-n" "--read-count") -for arg in "${required_args[@]}"; do - if [[ "$par_extra_args" == *"$arg"* ]]; then - echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args" - exit 1 - fi -done - -if [ $n_fastq -eq 1 ]; then - fq subsample $par_extra_args ${input[*]} --r1-dst $par_output_1 -elif [ $n_fastq -eq 2 ]; then - fq subsample $par_extra_args ${input[*]} --r1-dst $par_output_1 --r2-dst $par_output_2 -else - echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!" - exit 1 -fi diff --git a/src/fq_subsample/test.sh b/src/fq_subsample/test.sh deleted file mode 100644 index 6408054..0000000 --- a/src/fq_subsample/test.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -echo ">>> Testing for paired-end reads" -"$meta_executable" \ - --input "$meta_resources_dir/SRR6357070_1.fastq.gz;$meta_resources_dir/SRR6357070_2.fastq.gz" \ - --extra_args '--record-count 1000000 --seed 1' \ - --output_1 SRR6357070_1.subsampled.fastq.gz \ - --output_2 SRR6357070_2.subsampled.fastq.gz - -echo ">> Checking if the correct files are present" -[ ! -f "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file for read 1 is missing!" && exit 1 -[ ! -s "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty!" && exit 1 -[ ! -f "SRR6357070_2.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file for read 2 is missing" && exit 1 -[ ! -s "SRR6357070_2.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty" && exit 1 - -rm SRR6357070_1.subsampled.fastq.gz SRR6357070_2.subsampled.fastq.gz - -echo ">>> Testing for single-end reads" -"$meta_executable" \ - --input $meta_resources_dir/SRR6357070_1.fastq.gz \ - --extra_args '--record-count 1000000 --seed 1' \ - --output_1 SRR6357070_1.subsampled.fastq.gz - -echo ">> Checking if the correct files are present" -[ ! -f "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is missing" && exit 1 -[ ! -s "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty" && exit 1 - -echo ">>> Tests finished successfully" -exit 0 - diff --git a/src/kallisto/kallisto_index/config.vsh.yaml b/src/kallisto/kallisto_index/config.vsh.yaml deleted file mode 100644 index ec47d37..0000000 --- a/src/kallisto/kallisto_index/config.vsh.yaml +++ /dev/null @@ -1,49 +0,0 @@ -name: kallisto_index -namespace: kallisto -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/kallisto/index/main.nf, modules/nf-core/kallisto/index/meta.yml] - last_sha: c0816976384d5e7ee6079c29c45958df1ffa0ee4 -description: | - Create Kallisto index. - -argument_groups: -- name: "Input" - arguments: - - name: "--transcriptome_fasta" - type: file - - name: "--pseudo_aligner_kmer_size" - type: integer - description: Kmer length passed to indexing step of pseudoaligners. - -- name: "Output" - arguments: - - name: "--kallisto_index" - type: file - direction: output - default: Kallisto_index - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/minimal_test/reference/transcriptome.fasta - -engines: - - type: docker - image: ubuntu:22.04 - setup: - - type: docker - run: | - apt-get update && \ - apt-get install -y --no-install-recommends wget && \ - wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \ - tar -xzf kallisto_linux-v0.50.1.tar.gz && \ - mv kallisto/kallisto /usr/local/bin/ -runners: - - type: executable - - type: nextflow diff --git a/src/kallisto/kallisto_index/script.sh b/src/kallisto/kallisto_index/script.sh deleted file mode 100644 index 1aec336..0000000 --- a/src/kallisto/kallisto_index/script.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -kallisto index \ - ${par_pseudo_aligner_kmer_size:+-k $par_pseudo_aligner_kmer_size} \ - -i $par_kallisto_index \ - $par_transcriptome_fasta diff --git a/src/kallisto/kallisto_index/test.sh b/src/kallisto/kallisto_index/test.sh deleted file mode 100644 index 3973d77..0000000 --- a/src/kallisto/kallisto_index/test.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -"$meta_executable" \ - --transcriptome_fasta "$meta_resources_dir/transcriptome.fasta" \ - --kallisto_index Kallisto - -echo ">>> Checking whether output exists" -[ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1 -[ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1 - -echo "All tests succeeded!" -exit 0 diff --git a/src/kallisto/kallisto_quant/config.vsh.yaml b/src/kallisto/kallisto_quant/config.vsh.yaml deleted file mode 100644 index b7ad9bb..0000000 --- a/src/kallisto/kallisto_quant/config.vsh.yaml +++ /dev/null @@ -1,88 +0,0 @@ -name: kallisto_quant -namespace: kallisto -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/kallisto/quant/main.nf, modules/nf-core/kallisto/quant/meta.yml] - last_sha: aff1d2e02717247831644769fc3ba84868c3fdde -description: | - Computes equivalence classes for reads and quantifies abundances. - -argument_groups: -- name: "Input" - arguments: - - name: "--input" - type: file - multiple: true - multiple_sep: "," - description: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - - name: "--paired" - type: boolean - description: Paired reads or not. - - name: "--strandedness" - type: string - description: Sample strand-specificity. - - name: "--index" - type: file - description: Kallisto genome index. - - name: "--gtf" - type: file - description: Optional gtf file for translation of transcripts into genomic coordinates. - - name: "--chromosomes" - type: file - description: Optional tab separated file with chromosome names and lengths. - - name: "--fragment_length" - type: integer - description: For single-end mode only, the estimated average fragment length. - - name: "--fragment_length_sd" - type: integer - description: For single-end mode only, the estimated standard deviation of the fragment length. - -- name: "Output" - arguments: - - name: "--output" - type: file - description: Kallisto quant results - default: "$id.kallisto_quant_results" - direction: output - - name: "--log" - type: file - description: File containing log information from running kallisto quant - default: "$id.kallisto_quant.log.txt" - direction: output - - name: "--run_info" - type: file - description: A json file containing information about the run - default: "$id.run_info.json" - direction: output - - name: "--quant_results_file" - type: file - description: TSV file containing abundance estimates from Kallisto - direction: output - default: $id.abundance.tsv - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/minimal_test/reference/transcriptome.fasta - - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz - - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz - -engines: - - type: docker - image: ubuntu:22.04 - setup: - - type: docker - run: | - apt-get update && \ - apt-get install -y --no-install-recommends wget && \ - wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \ - tar -xzf kallisto_linux-v0.50.1.tar.gz && \ - mv kallisto/kallisto /usr/local/bin/ -runners: - - type: executable - - type: nextflow diff --git a/src/kallisto/kallisto_quant/script.sh b/src/kallisto/kallisto_quant/script.sh deleted file mode 100644 index cba09f4..0000000 --- a/src/kallisto/kallisto_quant/script.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -IFS="," read -ra input <<< $par_input - -single_end_params='' -if [ $par_paired == "false" ]; then - if [[ $par_fragment_length < 0 ]] || [[ ! $fragment_length_sd < 0 ]]; then - echo "fragment_length and fragment_length_sd must be set for single-end data" - exit 1 - fi - single_end_params="--single --fragment-length $par_fragment_length --sd $par_fragment_length_sd" -fi - -strandedness='' -if [[ "$par_extra_args" != *"--fr-stranded"* ]] && [[ "$par_extra_args" != *"--rf-stranded"* ]]; then - if [ "$par_strandedness" == 'forward' ]; then - strandedness='--fr-stranded' - elif [ "$par_strandedness" == 'reverse' ]; then - strandedness='--rf-stranded' - fi -fi - -mkdir -p $par_output - -kallisto quant \ - ${meta_cpus:+--threads $meta_cpus} \ - --index $par_index \ - ${par_gtf:+--gtf $par_gtf} \ - ${par_chromosomes:+--chromosomes $par_chromosomes} \ - $single_end_params \ - $strandedness \ - $par_extra_args \ - -o $par_output \ - ${input[*]} 2> >(tee -a ${par_output}/kallisto_quant.log >&2) - -mv ${par_output}/kallisto_quant.log ${par_log} -mv ${par_output}/run_info.json ${par_run_info} -cp ${par_output}/abundance.tsv ${par_quant_results_file} diff --git a/src/kallisto/kallisto_quant/test.sh b/src/kallisto/kallisto_quant/test.sh deleted file mode 100644 index 248e896..0000000 --- a/src/kallisto/kallisto_quant/test.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -echo ">>> Generating Kallisto index" -kallisto index \ - -i index \ - $meta_resources_dir/transcriptome.fasta - -echo ">>> Testing for paired-end reads" -"$meta_executable" \ - --index index \ - --paired true \ - --strandedness reverse \ - --output paired_end_test \ - --input "SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz" \ - --log quant_pe.log \ - --run_info pe_run_info.json - -echo ">>> Checking whether output exists" -[ ! -d "paired_end_test" ] && echo "Kallisto results do not exist!" && exit 1 -[ ! -f "quant_pe.log" ] && echo "quant_pe.log does not exist!" && exit 1 -[ ! -s "quant_pe.log" ] && echo "quant_pe.log is empty!" && exit 1 -[ ! -f "pe_run_info.json" ] && echo "pe_run_info.json does not exist!" && exit 1 -[ ! -s "pe_run_info.json" ] && echo "pe_run_info.json is empty!" && exit 1 -[ ! -f "paired_end_test/abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1 -[ ! -s "paired_end_test/abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1 -[ ! -f "paired_end_test/abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1 -[ ! -s "paired_end_test/abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1 - -echo ">>> Testing for single-end reads" -"$meta_executable" \ - --index index \ - --paired false \ - --strandedness "reverse" \ - --output single_end_test \ - --input "SRR6357070_1.fastq.gz" \ - --log quant_se.log \ - --run_info se_run_info.json \ - --fragment_length 101 \ - --fragment_length_sd 50 - -echo ">>> Checking whether output exists" -[ ! -d "single_end_test" ] && echo "Kallisto results do not exist!" && exit 1 -[ ! -f "quant_se.log" ] && echo "quant_se.log does not exist!" && exit 1 -[ ! -s "quant_se.log" ] && echo "quant_se.log is empty!" && exit 1 -[ ! -f "se_run_info.json" ] && echo "se_run_info.json does not exist!" && exit 1 -[ ! -s "se_run_info.json" ] && echo "se_run_info.json is empty!" && exit 1 -[ ! -f "single_end_test/abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1 -[ ! -s "single_end_test/abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1 -[ ! -f "single_end_test/abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1 -[ ! -s "single_end_test/abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1 - -echo "All tests succeeded!" -exit 0 diff --git a/src/qualimap/config.vsh.yaml b/src/qualimap/config.vsh.yaml deleted file mode 100644 index a3a3852..0000000 --- a/src/qualimap/config.vsh.yaml +++ /dev/null @@ -1,118 +0,0 @@ -name: "qualimap" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/qualimap/rnaseq/main.nf] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - RNA-seq QC analysis using the qualimap - -argument_groups: -- name: "Input" - arguments: - - name: "--input" - type: file - required: true - description: path to input mapping file in BAM format. - - - name: "--gtf" - type: file - required: true - description: path to annotations file in Ensembl GTF format. - -- name: "Output" - arguments: - - name: "--output_dir" - direction: output - type: file - required: false - default: $id.qualimap_output - description: path to output directory for raw data and report. - - - name: "--output_pdf" - type: file - direction: output - required: false - must_exist: false - default: $id.report.pdf - description: path to output file for pdf report. - - - name: "--output_format" - type: string - required: false - default: html - description: Format of the output report (PDF or HTML, default is HTML) - -- name: "Optional" - arguments: - - name: "--pr_bases" - type: integer - required: false - default: 100 - min: 1 - description: Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100). - - - name: "--tr_bias" - type: integer - required: false - default: 1000 - min: 1 - description: Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000). - - - name: "--algorithm" - type: string - required: false - default: uniquely-mapped-reads - description: Counting algorithm (uniquely-mapped-reads (default) or proportional). - - - name: "--sequencing_protocol" - type: string - required: false - choices: ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"] - default: non-strand-specific - description: Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)). - - - name: "--paired" - type: boolean_true - description: Setting this flag for paired-end experiments will result in counting fragments instead of reads. - - - name: "--sorted" - type: boolean_true - description: Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis. - - - name: "--java_memory_size" - type: string - required: false - default: 4G - description: maximum Java heap memory size, default = 4G. - -resources: - - type: bash_script - path: script.sh -test_resources: - - type: bash_script - path: test.sh - - path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam - - path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai - - path: /testData/unit_test_resources/genes.gtf - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [ r-base, unzip, wget, openjdk-8-jdk, libxml2-dev, libcurl4-openssl-dev ] - - type: docker - run: | - wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \ - unzip qualimap_v2.3.zip && \ - cp -a qualimap_v2.3/. usr/bin && \ - unset DISPLAY && \ - mkdir -p tmp && \ - export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - - type: r - bioc: [ NOISeqr ] - cran: [ optparse ] -runners: -- type: executable -- type: nextflow diff --git a/src/qualimap/script.sh b/src/qualimap/script.sh deleted file mode 100644 index 25a47a5..0000000 --- a/src/qualimap/script.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -mkdir -p $par_output_dir - -qualimap rnaseq \ - --java-mem-size=$par_java_memory_size \ - --algorithm $par_algorithm \ - --num-pr-bases $par_pr_bases \ - --num-tr-bias $par_tr_bias \ - --sequencing-protocol $par_sequencing_protocol \ - -bam $par_input \ - -gtf $par_gtf \ - ${par_paired:+-pe} \ - ${par_sorted:+-s} \ - -outdir $par_output_dir \ - -outformat $par_output_format - diff --git a/src/qualimap/test.sh b/src/qualimap/test.sh deleted file mode 100644 index b45df2a..0000000 --- a/src/qualimap/test.sh +++ /dev/null @@ -1,24 +0,0 @@ -echo "> Running $meta_functionality_name." - -# define input and output for script -input_bam="$meta_resources_dir/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" -input_gtf="$meta_resources_dir/genes.gtf" -output_dir="qualimap_output" - -"$meta_executable" \ - --input "$input_bam" \ - --gtf "$input_gtf" \ - --output_dir "$output_dir" - -exit_code=$? -[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 - -echo ">> Checking whether output dir and files exists" - -[ ! -d "$output_dir" ] && echo "Output dir could not be found!" && exit 1 -[ ! -d "$output_dir/raw_data_qualimapReport" ] && echo "Raw data folder could not be found!" && exit 1 -[ -z $(ls -A "$output_dir/raw_data_qualimapReport") ] && echo "Raw data folder is missing output files" && exit 1 -[ ! -f "$output_dir/qualimapReport.html" ] && echo "Qualimap report was not found" && exit 1 -[ ! -s "$output_dir/qualimapReport.html" ] && echo "Qualimap report is empty" && exit 1 - -exit 0 \ No newline at end of file diff --git a/src/rsem/rsem_calculate_expression/config.vsh.yaml b/src/rsem/rsem_calculate_expression/config.vsh.yaml deleted file mode 100644 index b556b78..0000000 --- a/src/rsem/rsem_calculate_expression/config.vsh.yaml +++ /dev/null @@ -1,135 +0,0 @@ -name: "rsem_calculate_expression" -namespace: "rsem" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/rsem/calculateexpression/main.nf, modules/nf-core/rsem/calculateexpression/meta.yml] - last_sha: 92b2a7857de1dda9d1c19a088941fc81e2976ff7 - -description: | - Calculate expression with RSEM. - -argument_groups: -- name: "Input" - arguments: - - name: "--id" - type: string - description: Sample ID. - - name: "--strandedness" - type: string - description: Sample strand-specificity. Must be one of unstranded, forward, reverse - choices: [forward, reverse, unstranded] - - name: "--paired" - type: boolean - description: Paired-end reads or not? - - name: "--input" - type: file - description: Input reads for quantification. - multiple: true - multiple_sep: ";" - - name: "--index" - type: file - description: RSEM index. - - name: "--extra_args" - type: string - description: Extra rsem-calculate-expression arguments in addition to the defaults. - -- name: "Output" - arguments: - - name: "--counts_gene" - type: file - description: Expression counts on gene level - example: sample.genes.results - direction: output - - name: "--counts_transcripts" - type: file - description: Expression counts on transcript level - example: sample.isoforms.results - direction: output - - name: "--stat" - type: file - description: RSEM statistics - example: sample.stat - direction: output - - name: "--logs" - type: file - description: RSEM logs - example: sample.log - direction: output - - name: "--bam_star" - type: file - description: BAM file generated by STAR (optional) - example: sample.STAR.genome.bam - direction: output - - name: "--bam_genome" - type: file - description: Genome BAM file (optional) - example: sample.genome.bam - direction: output - - name: "--bam_transcript" - type: file - description: Transcript BAM file (optional) - example: sample.transcript.bam - direction: output - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz - - path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz - - path: /testData/minimal_test/reference/rsem.tar.gz - -# TODO: Install bowtie/bowtie2 -engines: - - type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: - - build-essential - - gcc - - g++ - - make - - wget - - zlib1g-dev - - unzip - - xxd - - perl - - r-base - - bowtie2 - - python3-pip - - git - - type: docker - env: - - STAR_VERSION=2.7.11b - - RSEM_VERSION=1.3.3 - - TZ=Europe/Brussels - run: | - ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ - unzip ${STAR_VERSION}.zip && \ - cd STAR-${STAR_VERSION}/source && \ - make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ - cp STAR /usr/local/bin && \ - cd /tmp && \ - wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \ - unzip v${RSEM_VERSION}.zip && \ - cd RSEM-${RSEM_VERSION} && \ - make && \ - make install && \ - rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ - rm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \ - cd && \ - apt-get clean && \ - echo 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \ - echo 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \ - /bin/bash -c "source /etc/profile && source ~/.bashrc && echo $PATH && which STAR" - -runners: - - type: executable - - type: nextflow \ No newline at end of file diff --git a/src/rsem/rsem_calculate_expression/script.sh b/src/rsem/rsem_calculate_expression/script.sh deleted file mode 100755 index 8007bb7..0000000 --- a/src/rsem/rsem_calculate_expression/script.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "$tmpdir" -} -trap clean_up EXIT - -tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") - -[[ "$par_paired" == "false" ]] && unset par_paired - -if [ $par_strandedness == 'forward' ]; then - strandedness='--strandedness forward' -elif [ $par_strandedness == 'reverse' ]; then - strandedness='--strandedness reverse' -else - strandedness='' -fi - -IFS=";" read -ra input <<< $par_input - -INDEX=`find -L $par_index/ -name "*.grp" | sed 's/\.grp$//'` - -rsem-calculate-expression \ - ${meta_cpus:+--num-threads $meta_cpus} \ - $strandedness \ - ${par_paired:+--paired-end} \ - $par_extra_args \ - ${input[*]} \ - $INDEX \ - $par_id - -[[ -e "${par_id}.genes.results" ]] && mv "${par_id}.genes.results" $par_counts_gene -[[ -e "${par_id}id.isoforms.results" ]] && mv "${par_id}id.isoforms.results" $par_counts_transcripts -[[ -e "${par_id}.stat" ]] && mv "${par_id}.stat" $par_stat -# [[ -e "${par_id}.log" ]] && mv "${par_id}.log" $par_logs -[[ -e "${par_id}.STAR.genome.bam" ]] && mv "${par_id}.STAR.genome.bam" $par_bam_star -[[ -e "${par_id}.genome.bam" ]] && mv "${par_id}.genome.bam" $par_bam_genome -[[ -e "${par_id}.transcript.bam" ]] && mv "${par_id}.transcript.bam" $par_bam_transcript \ No newline at end of file diff --git a/src/rsem/rsem_calculate_expression/test.sh b/src/rsem/rsem_calculate_expression/test.sh deleted file mode 100644 index 5641ec7..0000000 --- a/src/rsem/rsem_calculate_expression/test.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -tar -xavf $meta_resources_dir/rsem.tar.gz - -echo ">>> Calculating expression" -"$meta_executable" \ - --id WT_REP1 \ - --strandedness reverse \ - --paired true \ - --input "$meta_resources_dir/SRR6357070_1.fastq.gz;$meta_resources_dir/SRR6357070_2.fastq.gz" \ - --index rsem \ - --extra_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \ - --counts_gene WT_REP1.genes.results \ - --counts_transctips WT_REP1.isoforms.results \ - --logs WT_REP1.log - -echo ">>> Checking whether output exists" -[ ! -f "WT_REP1.genes.results" ] && echo "Gene level expression counts file does not exist!" && exit 1 -[ ! -s "WT_REP1.genes.results" ] && echo "Gene level expression counts file is empty!" && exit 1 -[ ! -f "WT_REP1.log" ] && echo "Log file does not exist!" && exit 1 -[ ! -s "WT_REP1.log" ] && echo "Log file is empty!" && exit 1 - -echo "All tests succeeded!" -exit 0 diff --git a/src/rsem/rsem_merge_counts/config.vsh.yaml b/src/rsem_merge_counts/config.vsh.yaml similarity index 98% rename from src/rsem/rsem_merge_counts/config.vsh.yaml rename to src/rsem_merge_counts/config.vsh.yaml index a814662..823bad7 100644 --- a/src/rsem/rsem_merge_counts/config.vsh.yaml +++ b/src/rsem_merge_counts/config.vsh.yaml @@ -1,5 +1,4 @@ name: "rsem_merge_counts" -namespace: "rsem" info: migration_info: git_repo: https://github.com/nf-core/rnaseq.git diff --git a/src/rsem/rsem_merge_counts/script.sh b/src/rsem_merge_counts/script.sh similarity index 100% rename from src/rsem/rsem_merge_counts/script.sh rename to src/rsem_merge_counts/script.sh diff --git a/src/rseqc/rseqc_bamstat/config.vsh.yaml b/src/rseqc/rseqc_bamstat/config.vsh.yaml deleted file mode 100644 index 6dc0a9e..0000000 --- a/src/rseqc/rseqc_bamstat/config.vsh.yaml +++ /dev/null @@ -1,53 +0,0 @@ -name: "rseqc_bamstat" -namespace: "rseqc" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/rseqc/bamstat/main.nf] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - Generate statistics from a bam file. - -argument_groups: -- name: "Input" - arguments: - - name: "--input" - type: file - required: true - description: input alignment file in BAM or SAM format - - - name: "--map_qual" - type: integer - required: false - default: 30 - description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. - min: 0 - -- name: "Output" - arguments: - - name: "--output" - type: file - direction: output - required: false - default: $id.mapping_quality.txt - description: output file (txt) with mapping quality statistics - -resources: - - type: bash_script - path: script.sh -test_resources: - - type: bash_script - path: test.sh - - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [ python3-pip ] - - type: python - packages: [ RSeQC ] -runners: -- type: executable -- type: nextflow diff --git a/src/rseqc/rseqc_bamstat/script.sh b/src/rseqc/rseqc_bamstat/script.sh deleted file mode 100644 index dcc8bd0..0000000 --- a/src/rseqc/rseqc_bamstat/script.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -bam_stat.py \ - --input $par_input \ - --mapq $par_map_qual \ -> $par_output diff --git a/src/rseqc/rseqc_bamstat/test.sh b/src/rseqc/rseqc_bamstat/test.sh deleted file mode 100644 index 91be990..0000000 --- a/src/rseqc/rseqc_bamstat/test.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# define input and output for script - -input_bam="test.paired_end.sorted.bam" -output_summary="mapping_quality.txt" - -# run executable and tests -echo "> Running $meta_functionality_name." - -"$meta_executable" \ - --input "$meta_resources_dir/$input_bam" \ - --output "$output_summary" - -exit_code=$? -[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 - -echo ">> Checking whether output can be found and has content" - -[ ! -f "$output_summary" ] && echo "$output_summary file missing" && exit 1 -[ ! -s "$output_summary" ] && echo "$output_summary file is empty" && exit 1 - -exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_inferexperiment/config.vsh.yaml b/src/rseqc/rseqc_inferexperiment/config.vsh.yaml deleted file mode 100644 index f7890e6..0000000 --- a/src/rseqc/rseqc_inferexperiment/config.vsh.yaml +++ /dev/null @@ -1,67 +0,0 @@ -name: "rseqc_inferexperiment" -namespace: "rseqc" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/rseqc/inferexperiment/main.nf] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - Infer strandedness from sequencing reads - -argument_groups: -- name: "Input" - arguments: - - name: "--input" - type: file - required: true - description: input alignment file in BAM or SAM format - - - name: "--refgene" - type: file - required: true - description: Reference gene model in bed format - - - name: "--sample_size" - type: integer - required: false - default: 200000 - min: 1 - description: Numer of reads sampled from SAM/BAM file, default = 200000. - - - name: "--map_qual" - type: integer - required: false - default: 30 - description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. - min: 0 - -- name: "Output" - arguments: - - name: "--output" - type: file - direction: output - required: false - default: $id.strandedness.txt - description: output file (txt) of strandness report - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam - - path: /testData/unit_test_resources/sarscov2/test.bed12 - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [ python3-pip ] - - type: python - packages: [ RSeQC ] -runners: -- type: executable -- type: nextflow diff --git a/src/rseqc/rseqc_inferexperiment/script.sh b/src/rseqc/rseqc_inferexperiment/script.sh deleted file mode 100644 index b98f04a..0000000 --- a/src/rseqc/rseqc_inferexperiment/script.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -infer_experiment.py \ - -i $par_input \ - -r $par_refgene \ - -s $par_sample_size \ - -q $par_map_qual \ -> $par_output diff --git a/src/rseqc/rseqc_inferexperiment/test.sh b/src/rseqc/rseqc_inferexperiment/test.sh deleted file mode 100644 index bc2414a..0000000 --- a/src/rseqc/rseqc_inferexperiment/test.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# define input and output for script -input_bam="$meta_resources_dir/test.paired_end.sorted.bam" -input_bed="$meta_resources_dir/test.bed12" -output="strandedness.txt" - -# run executable and tests -echo "> Running $meta_functionality_name." - -"$meta_executable" \ - --input "$input_bam" \ - --refgene "$input_bed" \ - --output "$output" - -exit_code=$? -[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 - -echo ">> Checking whether output can be found and has content" - -[ ! -f "$output" ] && echo "$output is missing" && exit 1 -[ ! -s "$output" ] && echo "$output is empty" && exit 1 - -exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_innerdistance/config.vsh.yaml b/src/rseqc/rseqc_innerdistance/config.vsh.yaml deleted file mode 100644 index 9d3b7fa..0000000 --- a/src/rseqc/rseqc_innerdistance/config.vsh.yaml +++ /dev/null @@ -1,117 +0,0 @@ -name: "rseqc_innerdistance" -namespace: "rseqc" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/rseqc/innerdistance/main.nf] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - Calculate inner distance between read pairs. - -argument_groups: -- name: "Input" - arguments: - - name: "--input" - type: file - required: true - description: input alignment file in BAM or SAM format - - - name: "--refgene" - type: file - required: true - description: Reference gene model in bed format - - - name: "--sample_size" - type: integer - required: false - default: 200000 - min: 1 - description: Numer of reads sampled from SAM/BAM file, default = 200000. - - - name: "--map_qual" - type: integer - required: false - default: 30 - description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. - min: 0 - - - name: "--lower_bound_size" - type: integer - required: false - default: -250 - description: Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250. - - - name: "--upper_bound_size" - type: integer - required: false - default: 250 - description: Upper bound of inner distance (bp). This option is used for ploting histograme, default=250. - - - name: "--step_size" - type: integer - required: false - default: 5 - description: Step size (bp) of histograme. This option is used for plotting histogram, default=5. - -- name: "Output" - arguments: - - name: "--output_stats" - type: file - direction: output - required: false - must_exist: false - default: $id.inner_distance.stats - description: output file (txt) with summary statistics of inner distances of paired reads - - - name: "--output_dist" - type: file - direction: output - required: false - must_exist: false - default: $id.inner_distance.txt - description: output file (txt) with inner distances of all paired reads - - - name: "--output_freq" - type: file - direction: output - required: false - must_exist: false - default: $id.inner_distance_freq.txt - description: output file (txt) with frequencies of inner distances of all paired reads - - - name: "--output_plot" - type: file - direction: output - required: false - must_exist: false - default: $id.inner_distance_plot.pdf - description: output file (pdf) with histogram plot of of inner distances of all paired reads - - - name: "--output_plot_r" - type: file - direction: output - required: false - must_exist: false - default: $id.inner_distance_plot.r - description: output file (R) with script of histogram plot of of inner distances of all paired reads - -resources: - - type: bash_script - path: script.sh -test_resources: - - type: bash_script - path: test.sh - - path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam - - path: /testData/unit_test_resources/sarscov2/test.bed12 - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [python3-pip, r-base] - - type: python - packages: [ RSeQC ] -runners: -- type: executable -- type: nextflow \ No newline at end of file diff --git a/src/rseqc/rseqc_innerdistance/script.sh b/src/rseqc/rseqc_innerdistance/script.sh deleted file mode 100644 index d4e5bc8..0000000 --- a/src/rseqc/rseqc_innerdistance/script.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -set -exo pipefail - -prefix=$(openssl rand -hex 8) - -inner_distance.py \ - -i $par_input \ - -r $par_refgene \ - -o $prefix \ - -k $par_sample_size \ - -l $par_lower_bound_size \ - -u $par_upper_bound_size \ - -s $par_step_size \ - -q $par_map_qual \ -> stdout.txt - -head -n 2 stdout.txt > $par_output_stats - -[[ -f "$prefix.inner_distance.txt" ]] && mv $prefix.inner_distance.txt $par_output_dist -[[ -f "$prefix.inner_distance_plot.pdf" ]] && mv $prefix.inner_distance_plot.pdf $par_output_plot -[[ -f "$prefix.inner_distance_plot.r" ]] && mv $prefix.inner_distance_plot.r $par_output_plot_r -[[ -f "$prefix.inner_distance_freq.txt" ]] && mv $prefix.inner_distance_freq.txt $par_output_freq diff --git a/src/rseqc/rseqc_innerdistance/test.sh b/src/rseqc/rseqc_innerdistance/test.sh deleted file mode 100644 index 832c42b..0000000 --- a/src/rseqc/rseqc_innerdistance/test.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -gunzip "$meta_resources_dir/hg19_RefSeq.bed.gz" - -# define input and output for script -input_bam="$meta_resources_dir/test.paired_end.sorted.bam" -input_bed="$meta_resources_dir/test.bed12" - -output_stats="inner_distance_stats.txt" -output_dist="inner_distance.txt" -output_plot="inner_distance_plot.pdf" -output_plot_r="inner_distance_plot.r" -output_freq="inner_distance_freq.txt" - -# Run executable -echo "> Running $meta_functionality_name" - -"$meta_executable" \ - --input $input_bam \ - --refgene $input_bed \ - --output_stats $output_stats \ - --output_dist $output_dist \ - --output_plot $output_plot \ - --output_plot_r $output_plot_r \ - --output_freq $output_freq - -exit_code=$? -[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 - -echo ">> asserting output has been created for paired read input" - -[ ! -f "$output_stats" ] && echo "$output_stats was not created" && exit 1 -[ ! -s "$output_stats" ] && echo "$output_stats is empty" && exit 1 -[ ! -f "$output_dist" ] && echo "$output_dist was not created" && exit 1 -[ ! -s "$output_dist" ] && echo "$output_dist is empty" && exit 1 -[ ! -f "$output_plot" ] && echo "$output_plot was not created" && exit 1 -[ ! -s "$output_plot" ] && echo "$output_plot is empty" && exit 1 -[ ! -f "$output_plot_r" ] && echo "$output_plot_r was not created" && exit 1 -[ ! -s "$output_plot_r" ] && echo "$output_plot_r is empty" && exit 1 -[ ! -f "$output_freq" ] && echo "$output_freq was not created" && exit 1 -[ ! -s "$output_freq" ] && echo "$output_freq is empty" && exit 1 - -exit 0 \ No newline at end of file diff --git a/src/sortmerna/config.vsh.yaml b/src/sortmerna/config.vsh.yaml index f957b0c..4df6fde 100644 --- a/src/sortmerna/config.vsh.yaml +++ b/src/sortmerna/config.vsh.yaml @@ -62,4 +62,4 @@ engines: image: quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 runners: - type: executable -- type: nextflow \ No newline at end of file +- type: nextflow diff --git a/src/sortmerna/script.sh b/src/sortmerna/script.sh index 253a885..8ef7c2d 100755 --- a/src/sortmerna/script.sh +++ b/src/sortmerna/script.sh @@ -39,3 +39,4 @@ else fi mv rRNA_reads.log $par_sortmerna_log + diff --git a/src/sortmerna/test.sh b/src/sortmerna/test.sh index 774e358..06ceb5d 100644 --- a/src/sortmerna/test.sh +++ b/src/sortmerna/test.sh @@ -38,3 +38,4 @@ echo ">> Checking if the correct files are present" echo ">>> Test finished successfully" exit 0 + diff --git a/src/trimgalore/config.vsh.yaml b/src/trimgalore/config.vsh.yaml deleted file mode 100644 index 6e405f0..0000000 --- a/src/trimgalore/config.vsh.yaml +++ /dev/null @@ -1,309 +0,0 @@ -name: trimgalore -description: | - A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. -keywords: ["trimming", "adapters"] -links: - homepage: https://github.com/FelixKrueger/TrimGalore - documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md - repository: https://github.com/FelixKrueger/TrimGalore -license: GPL-3.0 -requirements: - commands: [trim_galore] - -argument_groups: - - name: Input - arguments: - - name: "--input" - type: file - description: Input files. Note that paired-end files need to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz - required: true - multiple: true - example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq - - name: Trimming options - arguments: - - name: --quality - alternatives: -q - type: integer - description: Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal. For RRBS samples, quality trimming will be performed first, and adapter trimming is carried in a second round. Other files are quality and adapter trimmed in a single pass. The algorithm is the same as the one used by BWA (Subtract INT from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). - example: 20 - required: false - - name: --phred33 - type: boolean - description: Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming. - required: false - - name: --phred64 - type: boolean - description: Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming. - required: false - - name: --fastqc - type: boolean - description: Run FastQC in the default mode on the FastQ file once trimming is complete. - required: false - - name: --fastqc_args - type: string - description: Passes extra arguments to FastQC. If more than one argument is to be passed to FastQC they must be in the form "arg1 arg2 ...". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately. - required: false - example: "--nogroup --outdir /home/" - - name: --adapter - alternatives: -a - type: string - description: | - Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. - At a special request, multiple adapters can also be specified like so: - -a " AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT" -a2 " AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT", - or so: - -a "file:../multiple_adapters.fa" -a2 "file:../different_adapters.fa" - Potentially in conjucntion with the parameter "-n 3" to trim all adapters. - example: 20 - required: false - example: AGCTCCCG - - name: --adapter2 - alternatives: -a2 - type: string - description: Optional adapter sequence to be trimmed off read 2 of paired-end files. This option requires '--paired' to be specified as well. If the libraries to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5' adapter automatically (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA. - required: false - example: AGCTCCCG - - name: --illumina - type: boolean - description: Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence. - required: false - - name: --stranded_illumina - type: boolean - description: Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence. - required: false - - name: --nextera - type: boolean - description: Adapter sequence to be trimmed is the first 12bp of the Nextera adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence. - required: false - - name: --small_rna - type: boolean - description: Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly. - - name: --consider_already_trimmed - type: integer - description: During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed. If no adapter sequence exceeds this threshold, no additional adapter trimming will be performed (technically, the adapter is set to '-a X'). Quality trimming is still performed as usual. - required: false - - name: --max_length - type: integer - description: Discard reads that are longer than the specified value after trimming. This is only advised for smallRNA sequencing to remove non-small RNA sequences. - required: false - - name: --stringency - type: integer - description: Overlap with adapter sequence required to trim a sequence. Defaults to a very stringent setting of 1, i.e. even a single bp of overlapping sequence will be trimmed off from the 3' end of any read. - required: false - example: 1 - - name: --error_rate - alternatives: -e - type: double - description: Maximum allowed error rate (no. of errors divided by the length of the matching region) - required: false - example: 0.1 - - name: --gzip - type: boolean - description: Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly. - required: false - - name: --dont_gzip - type: boolean - description: Output files won't be compressed with GZIP. This option overrides --gzip. - required: false - - name: --length - type: integer - description: Discard reads that became shorter than the specified length because of either quality or adapter trimming. A value of '0' effectively disables this behaviour. For paired-end files, both reads of a read-pair need to be longer than the specified length to be printed out to validated paired-end files. If only one read became too short there is the possibility of keeping such unpaired single-end reads using the --retain_unpaired option. - required: false - example: 20 - - name: --max_n - type: integer - description: The total number of Ns a read may contain before it will be removed altogether.In a paired-end setting, either read exceeding this limit will result in the entire pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, it is interpreted as a fraction of the read length. - required: false - - name: --trim_n - type: boolean - description: Removes Ns from either side of the read. This option does currently not work in RRBS mode. - required: false - - name: --no_report_file - type: boolean - description: If specified no report file will be generated. - required: false - - name: --suppress_warn - type: boolean - description: If specified any output to STDOUT or STDERR will be suppressed. - required: false - - name: --clip_R1 - type: integer - description: Instructs TrimGalore to remove given number of bp from the 5' end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. - required: false - - name: --clip_R2 - type: integer - description: Instructs TrimGalore to remove given number bp from the 5' end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove the first few bp because the end-repair reaction may introduce a bias towards low methylation. - required: false - - name: --three_prime_clip_R1 - type: integer - description: Instructs Trim Galore to remove spacified number of bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some bias from the 3' end that is not directly related to adapter sequence or basecall quality. - required: false - - name: --three_prime_clip_R2 - type: integer - description: Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. - required: false - - name: --nextseq - type: integer - description: This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. This is mutually exlusive with '-q INT'. - required: false - - name: --basename - type: string - description: Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - required: false - - name: --cores - alternatives: -j - type: integer - description: Number of cores to be used for trimming - required: false - example: 1 - - name: Specific trimming options without adapter/quality trimming - arguments: - - name: --hardtrim5 - type: integer - description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to bp at the 5'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in ._5prime.fq(.gz). - required: false - - name: --hardtrim3 - type: integer - description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to bp at the 3'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in ._3prime.fq(.gz). - required: false - - name: --clock - type: boolean - description: In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock. - required: false - - name: --polyA - type: boolean - description: This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start ("32:A:") and end ("_PolyA:32") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of trimming. - required: false - - name: --implicon - type: boolean - description: | - This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it's current implementation, the UMI carrying reads come in the following format - Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3' - Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5' - Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence. - required: false - - name: RRBS-specific options - arguments: - - name: --rrbs - type: boolean - description: Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3' end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5' end (by setting '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3' MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below). - required: false - - name: --non_directional - type: boolean - description: Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well. Note that this option does not set '--clip_r2 2' in paired-end mode. - required: false - - name: --keep - type: boolean - description: Keep the quality trimmed intermediate file. - required: false - - name: Paired-end specific options - arguments: - - name: --paired - type: boolean - description: This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... . - required: false - - name: --retain_unpaired - type: boolean - description: If only one of the two paired-end reads became too short, the longer read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2. - required: false - - name: --length_1 - alternatives: -r1 - type: integer - description: Unpaired single-end read length cutoff needed for read 1 to be written to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode. - example: 35 - required: false - - name: --length_2 - alternatives: -r2 - type: integer - description: Unpaired single-end read length cutoff needed for read 2 to be written to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. - required: false - example: 35 - - name: Output - arguments: - - name: --output_dir - alternatives: -o - type: file - description: If specified all output will be written to this directory instead of the current directory. - direction: output - required: false - default: trimmed_output - - name: --trimmed_r1 - type: file - required: false - description: Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: read_1.fastq.gz - - name: --trimmed_r2 - type: file - required: false - description: Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: read_2.fastq.gz - - name: --trimming_report_r1 - type: file - required: false - description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: read_1.trimming_report.txt - - name: --trimming_report_r2 - type: file - description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - required: false - example: read_2.trimming_report.txt - - name: --trimmed_fastqc_html_1 - type: file - required: false - description: FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: read_1.fastqc.html - - name: --trimmed_fastqc_html_2 - type: file - description: FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - required: false - example: read_2.fastqc.html - - name: --trimmed_fastqc_zip_1 - type: file - required: false - description: FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: read_1.fastqc.zip - - name: --trimmed_fastqc_zip_2 - type: file - description: FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - required: false - example: read_2.fastqc.zip - - name: --unpaired_r1 - type: file - required: false - description: Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: unpaired_read_1.fastq - - name: --unpaired_r2 - type: file - required: false - description: Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. - direction: output - example: unpaired_read_2.fastq - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - -engines: -- type: docker - image: quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0 - setup: - - type: docker - run: | - echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt - -runners: - - type: executable - - type: nextflow \ No newline at end of file diff --git a/src/trimgalore/script.sh b/src/trimgalore/script.sh deleted file mode 100755 index 46413c1..0000000 --- a/src/trimgalore/script.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -[[ ! -d $output_dir ]] && mkdir -p $par_output_dir - -IFS=";" read -ra input <<< $par_input - -unset_if_false=( - par_phred33 - par_phred64 - par_fastqc - par_illumina - par_stranded_illumina - par_nextera - par_small_rna - par_gzip - par_dont_gzip - par_no_report_file - par_suppress_warn - par_clock - par_polyA - par_rrbs - par_non_directional - par_keep par_paired - par_retain_unpaired -) - -for par in ${unset_if_false[@]}; do - test_val="${!par}" - [[ "$test_val" == "false" ]] && unset $par -done - -trim_galore \ - ${par_quality:+-q "${par_quality}"} \ - ${par_phred33:+--phred33} \ - ${par_phred64:+--phred64 } \ - ${par_fastqc:+--fastqc } \ - ${par_fastqc_args:+--fastqc_args "${par_fastqc_args}"} \ - ${par_adapter:+-a "${par_adapter}"} \ - ${par_adapter2:+-a2 "${par_adapter2}"} \ - ${par_illumina:+--illumina} \ - ${par_stranded_illumina:+--stranded_illumina} \ - ${par_nextera:+--nextera} \ - ${par_small_rna:+--small_rna} \ - ${par_consider_already_trimmed:+--consider_already_trimmed "${par_consider_already_trimmed}"} \ - ${par_max_length:+--max_length "${par_max_length}"} \ - ${par_stringency:+--stringency "${par_stringency}"} \ - ${par_error_rate:+-e "${par_error_rate}"} \ - ${par_gzip:+--gzip} \ - ${par_dont_gzip:+--dont_gzip} \ - ${par_length:+--length "${par_length}"} \ - ${par_max_n:+--max_n "${par_max_n}"} \ - ${par_trim_n:+--trim-n "${par_trim_n}"} \ - ${par_no_report_file:+--no_report_file} \ - ${par_suppress_warn:+--suppress_warn} \ - ${par_clip_R1:+--clip_R1 "${par_clip_R1}"} \ - ${par_clip_R2:+--clip_R2 "${par_clip_R2}"} \ - ${par_three_prime_clip_R1:+--three_prime_clip_R1 "${par_three_prime_clip_R1}"} \ - ${par_three_prime_clip_R2:+--three_prime_clip_R2 "${par_three_prime_clip_R2}"} \ - ${par_nextseq:+--nextseq "${par_nextseq}"} \ - ${par_basename:+-basename "${par_basename}"} \ - ${par_hardtrim5:+--hardtrim5 "${par_hardtrim5}"} \ - ${par_hardtrim3:+--hardtrim3 "${par_hardtrim3}"} \ - ${par_clock:+--clock} \ - ${par_polyA:+--polyA} \ - ${par_implicon:+--implicon "${par_implicon}"} \ - ${par_rrbs:+--rrbs} \ - ${par_non_directional:+--non_directional} \ - ${par_keep:+--keep} \ - ${par_paired:+--paired} \ - ${par_retain_unpaired:+--retain_unpaired} \ - ${par_length_1:+-r1 "${par_length_1}"} \ - ${par_length_2:+-r2 "${par_length_2}"} \ - ${par_cores:+-j "${par_cores}"} \ - -o $par_output_dir \ - ${input[*]} - -if [ $par_paired == "true" ]; then - - input_r1=$(basename -- "${input[0]}") - input_r2=$(basename -- "${input[1]}") - [[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*val_1.f*q* $par_trimmed_r1 - [[ ! -z "$par_trimmed_r2" ]] && mv $par_output_dir/*val_2.f*q* $par_trimmed_r2 - [[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1 - [[ ! -z "$par_trimming_report_r2" ]] && mv $par_output_dir/${input_r2}_trimming_report.txt $par_trimming_report_r2 - - if [ "$par_fastqc" == "true" ]; then - [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*val_1_fastqc.html $par_trimmed_fastqc_html_1 - [[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv $par_output_dir/*val_2_fastqc.html $par_trimmed_fastqc_html_2 - [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*val_1_fastqc.zip $par_trimmed_fastqc_zip_1 - [[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv $par_output_dir/*val_2_fastqc.zip $par_trimmed_fastqc_zip_2 - fi - - if [ "$par_retain_unpaired" == "true" ]; then - [[ ! -z "$par_unpaired_r1" ]] && mv $par_output_dir/*.unpaired_1.f*q* $par_unpaired_r1 - [[ ! -z "$par_unpaired_r2" ]] && mv $par_output_dir/*.unpaired_2.f*q* $par_unpaired_r2 - fi - -else - - input_r1=$(basename -- "${input[0]}") - [[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*_trimmed.fq* $par_trimmed_r1 - [[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1 - - if [ "$par_fastqc" == "true" ]; then - [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*_trimmed_fastqc.html $par_trimmed_fastqc_html_1 - [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*_trimmed_fastqc.zip $par_trimmed_fastqc_zip_1 - fi - -fi \ No newline at end of file diff --git a/src/trimgalore/test.sh b/src/trimgalore/test.sh deleted file mode 100644 index 4972ce4..0000000 --- a/src/trimgalore/test.sh +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/bash - -set -e -set -eo pipefail - -# helper functions -assert_file_exists() { - [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } -} -assert_file_doesnt_exist() { - [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } -} -assert_file_empty() { - [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } -} -assert_file_not_empty() { - [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } -} -assert_file_contains() { - grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } -} -assert_file_not_contains() { - grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } -} - -################################################################# - -echo ">>> Prepare test data" - -cat > example_R1.fastq <<'EOF' -@SRR6357071.22842410 22842410/1 kraken:taxid|4932 -CAAGTTTTCATCTTCAACAGCTGATTGACTTCTTTGTGGTATGCCTCGATATATTTTTCTTTTTCTTTAATATCTTTATTATAGGTGATTGCCTCATCGTA -+ -BBBBBFFFFFFFFFFFFFFF/BFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFBF< -@SRR6357071.52260105 52260105/1 kraken:taxid|4932 -TAGACTTACCAGTACCCTTTTCGACGGCGGAAACATTCAAAATACCGTTAGAGTCGACATCGAAAGTGACTTCAATTTGTGGGACACCTCTTGGAGCTGGT -+ -BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/FFFFFFFFFFFFFFFF -EOF - -cat > example_R2.fastq <<'EOF' -@SRR6357071.22842410 22842410/2 kraken:taxid|4932 -CCGAGATCGAAGAAACGAATTCACCTGATTGCAGCTGTAAAAGCAGTAAAATCAATCAAACCAATACGGACAACCTTACGATACGATGAGGCAATCACCTA -+ -BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SRR6357071.52260105 52260105/2 kraken:taxid|4932 -GTTGATTCCAAGAAACTCTACCATTCCAACTAAGAAATCCGAAGTTTTCTCTACTTATGCTGACAACCAACCAGGTGTCTTGATTCAAGTCTTTGAAGGTG -+ -BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -EOF - -################################################################# - -echo ">>> Testing for single-end reads" -"$meta_executable" \ - --paired false \ - --input "example_R1.fastq" \ - --trimmed_fastqc_html_1 output_se_test/example.trimmed.html \ - --trimmed_fastqc_zip_1 output_se_test/example.trimmed.zip \ - --trimmed_r1 output_se_test/example.trimmed.fastq \ - --trimming_report_r1 output_se_test/example.trimming_report.txt \ - --fastqc true \ - --output_dir output_se_test - -echo ">> Checking output" -assert_file_exists "output_se_test/example.trimmed.html" -assert_file_exists "output_se_test/example.trimmed.zip" -assert_file_exists "output_se_test/example.trimmed.fastq" -assert_file_exists "output_se_test/example.trimming_report.txt" - -echo ">> Check if output is empty" -assert_file_not_empty "output_se_test/example.trimmed.html" -assert_file_not_empty "output_se_test/example.trimmed.zip" -assert_file_not_empty "output_se_test/example.trimmed.fastq" -assert_file_not_empty "output_se_test/example.trimming_report.txt" - -echo ">> Check contents" -assert_file_contains "output_se_test/example.trimmed.fastq" "@SRR6357071.22842410 22842410/1" -assert_file_contains "output_se_test/example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff" - -################################################################# - -echo ">>> Testing for paired-end reads" -"$meta_executable" \ - --paired true \ - --input "example_R1.fastq;example_R2.fastq" \ - --trimmed_fastqc_html_1 output_pe_test/example_R1.trimmed.html \ - --trimmed_fastqc_html_2 output_pe_test/example_R2.trimmed.html \ - --trimmed_fastqc_zip_1 output_pe_test/example_R1.trimmed.zip \ - --trimmed_fastqc_zip_2 output_pe_test/example_R2.trimmed.zip \ - --trimmed_r1 output_pe_test/example_R1.trimmed.fastq \ - --trimmed_r2 output_pe_test/example_R2.trimmed.fastq \ - --trimming_report_r1 output_pe_test/example_R1.trimming_report.txt \ - --trimming_report_r2 output_pe_test/example_R2.trimming_report.txt \ - --fastqc true \ - --output_dir output_pe_test - -echo ">> Checking output" -assert_file_exists "output_pe_test/example_R1.trimmed.html" -assert_file_exists "output_pe_test/example_R2.trimmed.html" -assert_file_exists "output_pe_test/example_R1.trimmed.zip" -assert_file_exists "output_pe_test/example_R2.trimmed.zip" -assert_file_exists "output_pe_test/example_R1.trimmed.fastq" -assert_file_exists "output_pe_test/example_R2.trimmed.fastq" -assert_file_exists "output_pe_test/example_R1.trimming_report.txt" -assert_file_exists "output_pe_test/example_R2.trimming_report.txt" - -echo ">> Check if output is empty" -assert_file_not_empty "output_pe_test/example_R1.trimmed.html" -assert_file_not_empty "output_pe_test/example_R2.trimmed.html" -assert_file_not_empty "output_pe_test/example_R1.trimmed.zip" -assert_file_not_empty "output_pe_test/example_R2.trimmed.zip" -assert_file_not_empty "output_pe_test/example_R1.trimmed.fastq" -assert_file_not_empty "output_pe_test/example_R2.trimmed.fastq" -assert_file_not_empty "output_pe_test/example_R1.trimming_report.txt" -assert_file_not_empty "output_pe_test/example_R2.trimming_report.txt" - -echo ">> Check contents" -assert_file_contains "output_pe_test/example_R1.trimmed.fastq" "@SRR6357071.22842410 22842410/1" -assert_file_contains "output_pe_test/example_R2.trimmed.fastq" "@SRR6357071.22842410 22842410/2" -assert_file_contains "output_pe_test/example_R1.trimming_report.txt" "sequences processed in total" -assert_file_contains "output_pe_test/example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff" - -################################################################# - -echo ">>> Test finished successfully" -exit 0 \ No newline at end of file diff --git a/src/tximport/tximport.r b/src/tximport/tximport.r index c47f8e6..5036399 100755 --- a/src/tximport/tximport.r +++ b/src/tximport/tximport.r @@ -137,5 +137,6 @@ if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) done <- lapply(params, write_se_table) # Output session information and citations -citation("tximeta") +# Removed for now because the 'tximeta' package is not found sometimes +# citation("tximeta") sessionInfo() \ No newline at end of file diff --git a/src/umitools/umitools_dedup/config.vsh.yaml b/src/umitools/umitools_dedup/config.vsh.yaml deleted file mode 100644 index 51930ba..0000000 --- a/src/umitools/umitools_dedup/config.vsh.yaml +++ /dev/null @@ -1,61 +0,0 @@ -name: "umitools_dedup" -namespace: "umitools" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/umitools/dedup/main.nf, modules/nf-core/umitools/dedup/meta.yml] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. - -argument_groups: -- name: "Input" - arguments: - - name: "--paired" - type: boolean - default: false - description: Paired fastq files or not? - - name: "--bam" - type: file - description: Input BAM file - - name: "--bai" - type: file - description: BAM index - - name: "--get_output_stats" - type: boolean - description: Whether or not to generate output stats. - -- name: "Output" - arguments: - - name: "--output_bam" - type: file - description: Deduplicated BAM file - direction: output - default: $id.$key.bam - - name: "--output_stats" - type: file - description: Directory containing UMI based dedupllication statistics files - direction: output - default: $id.umi_dedup.stats - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/unit_test_resources/chr19.bam - - path: /testData/unit_test_resources/chr19.bam.bai - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [pip] - - type: python - packages: [umi_tools] -runners: -- type: executable -- type: nextflow \ No newline at end of file diff --git a/src/umitools/umitools_dedup/script.sh b/src/umitools/umitools_dedup/script.sh deleted file mode 100755 index 39bb9b1..0000000 --- a/src/umitools/umitools_dedup/script.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -args="--random-seed=100" - -if $par_paired; then - paired="--paired" - args+=" --unpaired-reads=discard --chimeric-pairs=discard" -else - paired="" -fi - -if $par_get_output_stats; then - mkdir -p $par_output_stats - stats="--output-stats $par_output_stats/" -else - stats="" -fi - -PYTHONHASHSEED=0 umi_tools dedup -I $par_bam -S $par_output_bam $stats $paired $args diff --git a/src/umitools/umitools_dedup/test.sh b/src/umitools/umitools_dedup/test.sh deleted file mode 100644 index a07fbca..0000000 --- a/src/umitools/umitools_dedup/test.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -"$meta_executable" \ - --paired false \ - --bam $meta_resources_dir/chr19.bam \ - --bai $meta_resources_dir/chr19.bam.bai \ - --get_output_stats true \ - --output_bam chr19.deduped.bam \ - --output_stats chr19.umi_dedup.stats - -echo ">>> Checking whether output exists" -[ ! -f "chr19.deduped.bam" ] && echo "File 'chr19.deduped.bam' does not exist!" && exit 1 -[ ! -s "chr19.deduped.bam" ] && echo "File 'chr19.deduped.bam' is empty!" && exit 1 -[ ! -d "chr19.umi_dedup.stats" ] && echo "Directory 'chr19.umi_dedup.stats' does not exist!" && exit 1 -[ -z "$(ls -A 'chr19.umi_dedup.stats')" ] && echo "Directory 'chr19.umi_dedup.stats' is empty!" && exit 1 - -echo "All tests succeeded!" -exit 0 \ No newline at end of file diff --git a/src/umitools/umitools_extract/config.vsh.yaml b/src/umitools/umitools_extract/config.vsh.yaml deleted file mode 100644 index a975b82..0000000 --- a/src/umitools/umitools_extract/config.vsh.yaml +++ /dev/null @@ -1,93 +0,0 @@ -name: "umitools_extract" -namespace: "umitools" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/nf-core/umitools/extract/main.nf, modules/nf-core/umitools/extract/meta.yml] - last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33 -description: | - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information. - This component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name. - This component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place - -argument_groups: -- name: "Input" - arguments: - - name: "--paired" - type: boolean - required: false - default: false - description: Paired fastq files or not? - - name: "--input" - type: file - required: true - multiple: true - multiple_sep: "," - description: Input fastq files, either one or two (paired) - example: sample.fastq - - name: "--bc_pattern" - type: string - description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI." - multiple: true - multiple_sep: "," - -- name: "Output" - arguments: - - name: "--fastq_1" - type: file - required: true - description: Output file for read 1. - direction: output - default: $id.$key.read_1.fastq - - name: "--fastq_2" - type: file - required: false - must_exist: false - description: Output file for read 2. - direction: output - default: $id.$key.read_2.fastq - -- name: "Optional arguments" - arguments: - - name: "--umitools_extract_method" - type: "string" - description: UMI pattern to use. - default: string - choices: [ string, regex ] - - name: "--umitools_umi_separator" - type: string - default: "_" - description: The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software. - - name: "--umitools_grouping_method" - type: string - description: Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently. - default: "directional" - choices: ["unique", "percentile", "cluster", "adjacency", "directional"] - - name: "--umi_discard_read" - type: integer - description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively. - choices: [0, 1, 2] - default: 0 - -resources: - - type: bash_script - path: script.sh - -test_resources: - - type: bash_script - path: test.sh - - path: /testData/unit_test_resources/scrb_seq_fastq.1.gz - - path: /testData/unit_test_resources/scrb_seq_fastq.2.gz - - path: /testData/unit_test_resources/slim.fastq.gz - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [pip] - - type: python - packages: [umi_tools] -runners: -- type: executable -- type: nextflow diff --git a/src/umitools/umitools_extract/script.sh b/src/umitools/umitools_extract/script.sh deleted file mode 100755 index dcf0eac..0000000 --- a/src/umitools/umitools_extract/script.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "$tmpdir" -} -trap clean_up EXIT - -tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") - -IFS="," read -ra input <<< "$par_input" -IFS="," read -ra pattern <<< "$par_bc_pattern" - -read_count="${#input[@]}" -pattern_count="${#pattern[@]}" - -if [ "$par_paired" == "true" ]; then - echo "Paired - Reads: $read_count bc_patterns: $pattern_count" - if [ "$read_count" -ne 2 ] || [ "$pattern_count" -ne 2 ]; then - echo "Paired end input requires two read files and two UMI patterns" - exit 1 - else - read1="$(basename -- ${input[0]})" - read2="$(basename -- ${input[1]})" - umi_tools extract \ - -I "${input[0]}" --read2-in="${input[1]}" \ - -S "$tmpdir/$read1" \ - --read2-out="$tmpdir/$read2" \ - --extract-method $par_umitools_extract_method \ - --bc-pattern "${pattern[0]}" \ - --bc-pattern2 "${pattern[1]}" \ - --umi-separator $par_umitools_umi_separator - if [ $par_umi_discard_read == 1 ]; then - # discard read 1 - cp $tmpdir/$read1 $par_fastq_1 - elif [ $par_umi_discard_read == 2 ]; then - # discard read 2 - cp $tmpdir/$read2 $par_fastq_1 - else - cp $tmpdir/$read1 $par_fastq_1 - cp $tmpdir/$read2 $par_fastq_2 - fi - fi -else - echo "Not Paired - $read_count" - if [ "$read_count" -ne 1 ] || [ "$pattern_count" -ne 1 ]; then - echo "Single end input requires one read file and one UMI pattern" - exit 1 - else - read1="$(basename -- ${input[0]})" - umi_tools extract \ - -I "${input[0]}" -S "$tmpdir/$read1" \ - --extract-method $par_umitools_extract_method \ - --bc-pattern "${pattern[0]}" \ - --umi-separator $par_umitools_umi_separator - cp $tmpdir/$read1 $par_fastq_1 - fi -fi diff --git a/src/umitools/umitools_extract/test.sh b/src/umitools/umitools_extract/test.sh deleted file mode 100644 index 1f2a392..0000000 --- a/src/umitools/umitools_extract/test.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash - -echo ">>> Testing $meta_functionality_name" - -echo ">>> Testing for paired-end reads" -"$meta_executable" \ - --paired true \ - --input $meta_resources_dir/scrb_seq_fastq.1.gz,$meta_resources_dir/scrb_seq_fastq.2.gz \ - --bc_pattern CCCCCCNNNNNNNNNN,CCCCCCNNNNNNNNNN \ - --umitools_extract_method string \ - --umitools_umi_separator '_' \ - --umitools_grouping_method directional \ - --umi_discard_read 0 \ - --fastq_1 scrb_seq_fastq.1.umi_extract.fastq.gz \ - --fastq_2 scrb_seq_fastq.2.umi_extract.fastq.gz - -echo ">> Checking if the correct files are present" -[[ ! -f scrb_seq_fastq.1.umi_extract.fastq.gz ]] || [[ ! -f scrb_seq_fastq.2.umi_extract.fastq.gz ]] && echo "Reads file missing" && exit 1 -[ ! -s "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is empty" && exit 1 -[ ! -s "scrb_seq_fastq.2.umi_extract.fastq.gz" ] && echo "Read 2 file is empty" && exit 1 - -rm scrb_seq_fastq.1.umi_extract.fastq.gz scrb_seq_fastq.2.umi_extract.fastq.gz - -echo ">>> Testing for paired-end reads with umi_discard_reads option" -"$meta_executable" \ - --paired true \ - --input $meta_resources_dir/scrb_seq_fastq.1.gz,$meta_resources_dir/scrb_seq_fastq.2.gz \ - --bc_pattern CCCCCCNNNNNNNNNN,CCCCCCNNNNNNNNNN \ - --umitools_extract_method string \ - --umitools_umi_separator '_' \ - --umitools_grouping_method directional \ - --umi_discard_read 2 \ - --fastq_1 scrb_seq_fastq.1.umi_extract.fastq.gz \ - -echo ">> Checking if the correct files are present" -[ ! -f "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is missing" && exit 1 -[ ! -s "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is empty" && exit 1 -[ -f "scrb_seq_fastq.2.umi_extract.fastq.gz" ] && echo "Read 2 is not discarded" && exit 1 - -echo ">>> Testing for single-end reads" -"$meta_executable" \ - --paired false \ - --input $meta_resources_dir/slim.fastq.gz \ - --bc_pattern "^(?P.{3}).{4}(?P.{2})" \ - --umitools_extract_method regex \ - --umitools_umi_separator '_' \ - --umitools_grouping_method directional \ - --umi_discard_read 0 \ - --fastq_1 slim.umi_extract.fastq.gz - -echo ">> Checking if the correct files are present" -[ ! -f "slim.umi_extract.fastq.gz" ] && echo "Trimmed reads file missing" && exit 1 -[ ! -s "slim.umi_extract.fastq.gz" ] && echo "Trimmed reads file is empty" && exit 1 - -echo ">>> Test finished successfully" -exit 0 diff --git a/src/umitools_prepareforquant/config.vsh.yaml b/src/umitools_prepareforquant/config.vsh.yaml deleted file mode 100644 index f0fb894..0000000 --- a/src/umitools_prepareforquant/config.vsh.yaml +++ /dev/null @@ -1,42 +0,0 @@ -name: "umitools_prepareforquant" -info: - migration_info: - git_repo: https://github.com/nf-core/rnaseq.git - paths: [modules/local/umitools_prepareforrsem.nf] - last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d -description: Fix paired-end reads in name sorted BAM file to prepare for salmon quantification - -argument_groups: -- name: "Input" - arguments: - - name: "--bam" - type: file - -- name: "Output" - arguments: - - name: "--output" - type: file - direction: output - default: $id.transcriptome_sorted.bam - - name: "--log" - type: file - direction: output - default: $id.$key.log - -resources: - - type: bash_script - path: script.sh - # copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/prepare-for-rsem.py - - path: prepare-for-rsem.py - -engines: -- type: docker - image: ubuntu:22.04 - setup: - - type: apt - packages: [pip] - - type: python - packages: [umi_tools, pysam] -runners: -- type: executable -- type: nextflow \ No newline at end of file diff --git a/src/umitools_prepareforquant/script.sh b/src/umitools_prepareforquant/script.sh deleted file mode 100755 index 9ca3d72..0000000 --- a/src/umitools_prepareforquant/script.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -python3 "$meta_resources_dir/prepare-for-rsem.py" \ - --stdin=$par_bam \ - --stdout=$par_output \ - --log=$par_log diff --git a/src/workflows/genome_alignment_and_quant/config.vsh.yaml b/src/workflows/genome_alignment_and_quant/config.vsh.yaml index 6fefdbb..af72c60 100644 --- a/src/workflows/genome_alignment_and_quant/config.vsh.yaml +++ b/src/workflows/genome_alignment_and_quant/config.vsh.yaml @@ -4,173 +4,173 @@ description: | A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline. argument_groups: -- name: "Input" - arguments: - - name: "--id" - required: true - type: string - description: ID of the sample. - example: foo - - name: "--fastq_1" - alternatives: [-i] - type: file - description: Path to the sample (or read 1 of paired end sample). - required: true - example: input.fastq.gz - - name: "--fastq_2" - type: file - required: false - description: Path to read 2 of the sample. - - name: "--strandedness" - type: string - required: false - description: Sample strand-specificity. Must be one of unstranded, forward, or reverse - choices: [forward, reverse, unstranded] - - name: "--gtf" - type: file - description: GTF file - - name: "--transcript_fasta" - type: file - description: Fasta file of the reference transcriptome. - - name: "--star_index" - type: file - description: STAR index directory. - - name: "--star_ignore_sjdbgtf" - type: boolean - default: false - description: When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file - - name: --star_sjdb_gtf_feature_exon - type: string - description: Feature type in GTF file to be used as exons for building transcripts - - name: "--bam_csi_index" - type: boolean - default: false - description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes. - - name: "--umi_dedup_stats" - type: boolean - description: Generate output stats when running "umi_tools dedup". - default: false - - name: "--with_umi" - type: boolean - description: Enable UMI-based read deduplication. - default: false - - name: "--salmon_quant_libtype" - type: string - description: Override Salmon library type inferred based on strandedness defined in meta object. - - name: "--extra_salmon_quant_args" - type: string - default: '' - description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline. - - name: "--gtf_group_features" - type: string - default: 'gene_id' - description: Define the attribute type used to group features in the GTF file when running Salmon. - - name: "--gtf_extra_attributes" - type: string - default: 'gene_name' - description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon. - - name: --extra_rsem_calculate_expression_args - type: string - description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline. - - name: "--aligner" - type: string - description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'. - choices: [star_salmon, star_rsem, hisat2] - default: "star_salmon" - - name: "--rsem_index" - type: file - description: Path to directory for pre-built RSEM index. - - name: "--salmon_index" - type: file - description: Path to directory for pre-built Salmon index. - -- name: "Output" - arguments: - - name: "--star_multiqc" - type: file - direction: output - default: $id_star.log - - name: "--genome_bam_sorted" - type: file - direction: output - default: $id.genome.bam - - name: "--genome_bam_index" - type: file - direction: output - default: $id.genome.bam.bai - - name: "--genome_bam_stats" - type: file - direction: output - default: $id.genome.stats - - name: "--genome_bam_flagstat" - type: file - direction: output - default: $id.genome.flagstat - - name: "--genome_bam_idxstats" - type: file - direction: output - default: $id.genome.idxstats - - name: "--transcriptome_bam" - type: file - direction: output - default: $id.transcriptome.bam - - name: "--transcriptome_bam_index" - type: file - direction: output - default: $id.transcriptome.bam.bai - - name: "--transcriptome_bam_stats" - type: file - direction: output - default: $id.transcriptome.stats - - name: "--transcriptome_bam_flagstat" - type: file - direction: output - default: $id.transcriptome.flagstat - - name: "--transcriptome_bam_idxstats" - type: file - direction: output - default: $id.transcriptome.idxstats - - name: "--quant_out_dir" - type: file - direction: output - default: $id.salmon_quant - - name: "--quant_results_file" - type: file - direction: output - default: $id.quant.sf - - name: "--salmon_multiqc" - type: file - direction: output - - name: "--rsem_counts_gene" - type: file - description: Expression counts on gene level - default: $id.genes.results - direction: output - - name: "--counts_transcripts" - type: file - description: Expression counts on transcript level - default: $id.isoforms.results - direction: output - - name: "--rsem_multiqc" - type: file - description: RSEM statistics - default: $id.stat - direction: output - - name: "--bam_star_rsem" - type: file - description: BAM file generated by STAR (optional) - default: $id.STAR.genome.bam - direction: output - - name: "--bam_genome_rsem" - type: file - description: Genome BAM file (optional) - default: $id.genome.bam - direction: output - - name: "--bam_transcript_rsem" - type: file - description: Transcript BAM file (optional) - default: $id.transcript.bam - direction: output + - name: "Input" + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--fastq_1" + alternatives: [-i] + type: file + description: Path to the sample (or read 1 of paired end sample). + required: true + example: input.fastq.gz + - name: "--fastq_2" + type: file + required: false + description: Path to read 2 of the sample. + - name: "--strandedness" + type: string + required: false + description: Sample strand-specificity. Must be one of unstranded, forward, or reverse + choices: [forward, reverse, unstranded] + - name: "--gtf" + type: file + description: GTF file + - name: "--transcript_fasta" + type: file + description: Fasta file of the reference transcriptome. + - name: "--star_index" + type: file + description: STAR index directory. + - name: "--star_ignore_sjdbgtf" + type: boolean + default: false + description: When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file + - name: --star_sjdb_gtf_feature_exon + type: string + description: Feature type in GTF file to be used as exons for building transcripts + - name: "--bam_csi_index" + type: boolean + default: false + description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes. + - name: "--umi_dedup_stats" + type: boolean + description: Generate output stats when running "umi_tools dedup". + default: false + - name: "--with_umi" + type: boolean + description: Enable UMI-based read deduplication. + default: false + - name: "--salmon_quant_libtype" + type: string + description: Override Salmon library type inferred based on strandedness defined in meta object. + - name: "--extra_salmon_quant_args" + type: string + default: '' + description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline. + - name: "--gtf_group_features" + type: string + default: 'gene_id' + description: Define the attribute type used to group features in the GTF file when running Salmon. + - name: "--gtf_extra_attributes" + type: string + default: 'gene_name' + description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon. + - name: --extra_rsem_calculate_expression_args + type: string + description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline. + - name: "--aligner" + type: string + description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'. + choices: [star_salmon, star_rsem, hisat2] + default: "star_salmon" + - name: "--rsem_index" + type: file + description: Path to directory for pre-built RSEM index. + - name: "--salmon_index" + type: file + description: Path to directory for pre-built Salmon index. + + - name: "Output" + arguments: + - name: "--star_multiqc" + type: file + direction: output + default: $id_star.log + - name: "--genome_bam_sorted" + type: file + direction: output + default: $id.genome.bam + - name: "--genome_bam_index" + type: file + direction: output + default: $id.genome.bam.bai + - name: "--genome_bam_stats" + type: file + direction: output + default: $id.genome.stats + - name: "--genome_bam_flagstat" + type: file + direction: output + default: $id.genome.flagstat + - name: "--genome_bam_idxstats" + type: file + direction: output + default: $id.genome.idxstats + - name: "--transcriptome_bam" + type: file + direction: output + default: $id.transcriptome.bam + - name: "--transcriptome_bam_index" + type: file + direction: output + default: $id.transcriptome.bam.bai + - name: "--transcriptome_bam_stats" + type: file + direction: output + default: $id.transcriptome.stats + - name: "--transcriptome_bam_flagstat" + type: file + direction: output + default: $id.transcriptome.flagstat + - name: "--transcriptome_bam_idxstats" + type: file + direction: output + default: $id.transcriptome.idxstats + - name: "--quant_out_dir" + type: file + direction: output + default: $id.salmon_quant + - name: "--quant_results_file" + type: file + direction: output + default: $id.quant.sf + - name: "--salmon_multiqc" + type: file + direction: output + - name: "--rsem_counts_gene" + type: file + description: Expression counts on gene level + default: $id.genes.results + direction: output + - name: "--counts_transcripts" + type: file + description: Expression counts on transcript level + default: $id.isoforms.results + direction: output + - name: "--rsem_multiqc" + type: file + description: RSEM statistics + default: $id.stat + direction: output + - name: "--bam_star_rsem" + type: file + description: BAM file generated by STAR (optional) + default: $id.STAR.genome.bam + direction: output + - name: "--bam_genome_rsem" + type: file + description: Genome BAM file (optional) + default: $id.genome.bam + direction: output + - name: "--bam_transcript_rsem" + type: file + description: Transcript BAM file (optional) + default: $id.transcript.bam + direction: output resources: - type: nextflow_script @@ -190,16 +190,14 @@ dependencies: repository: biobox - name: samtools/samtools_idxstats repository: biobox - - name: umitools/umitools_dedup - # - name: umi_tools/umi_tools_dedup - # repository: biobox - - name: umitools_prepareforquant - # - name: umi_tools/umi_tools_prepareforquant - # repository: biobox + - name: umi_tools/umi_tools_dedup + repository: biobox + - name: umi_tools/umi_tools_prepareforrsem + repository: biobox - name: salmon/salmon_quant repository: biobox - name: rsem/rsem_calculate_expression - # repository: biobox + repository: biobox runners: - type: executable diff --git a/src/workflows/genome_alignment_and_quant/main.nf b/src/workflows/genome_alignment_and_quant/main.nf index e352e8d..15102e6 100644 --- a/src/workflows/genome_alignment_and_quant/main.nf +++ b/src/workflows/genome_alignment_and_quant/main.nf @@ -84,167 +84,169 @@ workflow run_wf { key: "genome_idxstats" ) - // - // Remove duplicate reads from BAM file based on UMIs - // - - // Deduplicate genome BAM file - | umitools_dedup.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "paired": "paired", - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "get_output_stats": "umi_dedup_stats" - ], - toState: [ "genome_bam_sorted": "output_bam" ], - key: "genome_deduped" - ) - | samtools_index.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "genome_bam_sorted", - "csi": "bam_csi_index" - ], - toState: [ "genome_bam_index": "output" ], - key: "genome_deduped" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_stats": "output" ], - key: "genome_deduped_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_flagstat": "output" ], - key: "genome_deduped_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta", - ], - toState: [ "genome_bam_idxstats": "output" ], - key: "genome_deduped_idxstats" - ) + // + // Remove duplicate reads from BAM file based on UMIs + // + + // Deduplicate genome BAM file + | umi_tools_dedup.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.genome_bam, + bai: state.genome_bam_index, + output_stats: output_stats] + }, + toState: [ "genome_bam_sorted": "output" ], + key: "genome_deduped" + ) + | samtools_index.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_deduped" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_deduped_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_deduped_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta", + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_deduped_idxstats" + ) // Deduplicate transcriptome BAM file - | samtools_sort.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ "input": "transcriptome_bam" ], - toState: [ "transcriptome_bam": "output" ], - key: "transcriptome_sorted" - ) - | samtools_index.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "csi": "bam_csi_index" - ], - toState: [ "transcriptome_bam_index": "output" ], - key: "transcriptome_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "bai": "transcriptome_bam_index", - ], - toState: [ "transcriptome_bam_stats": "output" ], - key: "transcriptome_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_flagstat": "output" ], - key: "transcriptome_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_idxstats": "output" ], - key: "transcriptome_idxstats" - ) - - | umitools_dedup.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "paired": "paired", - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index", - "get_output_stats": "umi_dedup_stats", - ], - toState: [ "transcriptome_bam_deduped": "output_bam" ], - key: "transcriptome_deduped" - ) - | samtools_sort.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ "input": "transcriptome_bam_deduped" ], - toState: [ "transcriptome_bam": "output" ], - key: "transcriptome_deduped_sorted" - ) - | samtools_index.run ( + | samtools_sort.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_sorted" + ) + | samtools_index.run ( runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, fromState: [ "input": "transcriptome_bam", "csi": "bam_csi_index" ], toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index", + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_idxstats" + ) + + | umi_tools_dedup.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.transcriptome_bam, + bai: state.transcriptome_bam_index, + output_stats: output_stats] + }, + toState: [ "transcriptome_bam_deduped": "output" ], + key: "transcriptome_deduped" + ) + | samtools_sort.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam_deduped" ], + toState: [ "transcriptome_bam": "output" ], key: "transcriptome_deduped_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_stats": "output" ], - key: "transcriptome_deduped_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_flagstat": "output" ], - key: "transcriptome_deduped_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_idxstats": "output" ], - key: "transcriptome_deduped_idxstats" - ) + ) + | samtools_index.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_deduped_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_deduped_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_deduped_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_deduped_idxstats" + ) - // Fix paired-end reads in name sorted BAM file - | umitools_prepareforquant.run ( - runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, - fromState: [ "bam": "transcriptome_bam" ], - toState: [ "transcriptome_bam": "output" ] - ) + // Fix paired-end reads in name sorted BAM file + | umi_tools_prepareforrsem.run ( + runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ] + ) // Infer lib-type for salmon quant | map { id, state -> @@ -281,78 +283,91 @@ workflow run_wf { ] ) - | map { id, state -> - def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state - [ id, mod_state ] - } - - | rsem_calculate_expression.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "id": "id", - "strandedness": "strandedness", - "paired": "paired", - "input": "input", - "index": "rsem_index", - "extra_args": "extra_rsem_calculate_expression_args" - ], - toState: [ - "rsem_counts_gene": "counts_gene", - "rsem_counts_transcripts": "counts_transcripts", - "rsem_multiqc": "stat", - "star_multiqc": "logs", - "bam_star_rsem": "bam_star", - "bam_genome_rsem": "bam_genome", - "bam_transcript_rsem": "bam_transcript" - ] - ) - - // RSEM_Star BAM - | samtools_sort.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: ["input": "bam_star_rsem"], - toState: ["genome_bam_sorted": "output"], - key: "genome_sorted" - ) - | samtools_index.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "input": "genome_bam_sorted", - "csi": "bam_csi_index" - ], - toState: [ "genome_bam_index": "output" ], - key: "genome_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "input": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_stats": "output" ], - key: "genome_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_flagstat": "output" ], - key: "genome_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_idxstats": "output" ], - key: "genome_idxstats" - ) + | map { id, state -> + def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | rsem_calculate_expression.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "id": "id", + "strandedness": "strandedness", + "paired": "paired", + "input": "input", + "index": "rsem_index", + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts", + "stat": "rsem_multiqc", + "logs": "star_multiqc", + "bam_star": "bam_star_rsem", + "bam_genome": "bam_genome_rsem", + "bam_transcript": "bam_transcript_rsem" + ], + args: [ + star: true, + star_output_genome_bam: true, + star_gzipped_read_file: true, + estimate_rspd: true, + seed: 1 + ], + toState: [ + "rsem_counts_gene": "counts_gene", + "rsem_counts_transcripts": "counts_transcripts", + "rsem_multiqc": "stat", + "star_multiqc": "logs", + "bam_star_rsem": "bam_star", + "bam_genome_rsem": "bam_genome", + "bam_transcript_rsem": "bam_transcript" + ] + ) + + // RSEM_Star BAM + | samtools_sort.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: ["input": "bam_star_rsem"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted" + ) + | samtools_index.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats" + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/src/workflows/genome_alignment_and_quant/test_run.sh b/src/workflows/genome_alignment_and_quant/test_run.sh index bf674e9..69ef4dc 100755 --- a/src/workflows/genome_alignment_and_quant/test_run.sh +++ b/src/workflows/genome_alignment_and_quant/test_run.sh @@ -1,6 +1,6 @@ #!/bin/bash -# v;iash ns build --setup cb --parallel +viash ns build --setup cb --parallel # Split error message from standard output # viash ns list > /dev/null @@ -37,7 +37,6 @@ nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \ --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ --rsem_index testData/minimal_test/reference/rsem_index \ --aligner star_rsem \ - --extra_rsem_calculate_expression_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \ -profile docker \ -resume diff --git a/src/workflows/post_processing/config.vsh.yaml b/src/workflows/post_processing/config.vsh.yaml index 40ca733..6a6d539 100644 --- a/src/workflows/post_processing/config.vsh.yaml +++ b/src/workflows/post_processing/config.vsh.yaml @@ -50,10 +50,6 @@ argument_groups: - name: "--stringtie_ignore_gtf" type: boolean description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file. - - name: "--extra_bedtools_args" - type: string - default: '' - description: Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline. - name: "--bam_csi_index" type: boolean default: false @@ -151,9 +147,8 @@ dependencies: - name: samtools/samtools_idxstats repository: biobox - name: stringtie - # - name: bedtools/bedtools_genomecov - # repository: biobox - - name: bedtools_genomecov + - name: bedtools/bedtools_genomecov + repository: biobox - name: ucsc/bedclip - name: ucsc/bedgraphtobigwig diff --git a/src/workflows/post_processing/main.nf b/src/workflows/post_processing/main.nf index 640b97b..d7e15f8 100644 --- a/src/workflows/post_processing/main.nf +++ b/src/workflows/post_processing/main.nf @@ -81,18 +81,35 @@ workflow run_wf { // Genome-wide coverage with BEDTools - | bedtools_genomecov.run ( - runIf: { id, state -> !state.skip_bigwig }, - fromState: [ - "strandedness": "strandedness", - "bam": "processed_genome_bam", - "extra_bedtools_args": "extra_bedtools_args" - ], - toState: [ - "bedgraph_forward": "bedgraph_forward", - "bedgraph_reverse": "bedgraph_reverse" - ] - ) + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "+" + ], + toState: [ "bedgraph_forward": "output" ], + key: "bedtools_genomecov_forward" + ) + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "-" + ], + toState: [ "bedgraph_reverse": "output" ], + key: "bedtools_genomecov_reverse" + ) | bedclip.run ( runIf: { id, state -> !state.skip_bigwig }, diff --git a/src/workflows/post_processing/test_run.sh b/src/workflows/post_processing/test_run.sh index d62aee2..09595d5 100755 --- a/src/workflows/post_processing/test_run.sh +++ b/src/workflows/post_processing/test_run.sh @@ -1,6 +1,6 @@ #!/bin/bash -# viash ns build --setup cb --parallel +viash ns build --setup cb --parallel nextflow run target/nextflow/workflows/post_processing/main.nf \ --publish_dir "testData/paired_end_test" \ @@ -14,8 +14,6 @@ nextflow run target/nextflow/workflows/post_processing/main.nf \ --chrom_sizes "testData/test_output/reference_genome.fasta.sizes" \ --star_multiqc "testData/paired_end_test/SRR6357070.star_align.log" \ --extra_picard_args "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" \ - --extra_bedtools_args "-split -du" \ - --extra_featurecounts_args "-B -C" \ --gencode false \ --biotype gene_biotype \ -profile docker \ diff --git a/src/workflows/pre_processing/config.vsh.yaml b/src/workflows/pre_processing/config.vsh.yaml index b26fde6..fadf259 100644 --- a/src/workflows/pre_processing/config.vsh.yaml +++ b/src/workflows/pre_processing/config.vsh.yaml @@ -29,9 +29,6 @@ argument_groups: - name: "--bbsplit_index" type: file description: BBsplit index - - name: "--bbsplit_fasta_list" - type: file - description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s) - name: "--ribo_database_manifest" type: file description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. @@ -110,12 +107,6 @@ argument_groups: description: Specify the trimming tool to use. choices: [ "trimgalore", "fastp"] default: "trimgalore" - - name: "--extra_trimgalore_args" - type: string - description: Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline. - # - name: "--extra_fastp_args" - # type: string - # description: Extra arguments to pass to fastp command in addition to defaults defined by the pipeline. - name: "--min_trimmed_reads" type: integer description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low. @@ -129,31 +120,15 @@ argument_groups: description: Save the trimmed FastQ files in the results directory. default: false - - name: "Alignment options" - arguments: - - name: "--extra_salmon_quant_args" - type: string - default: '' - description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline. - - name: "Read filtering options" arguments: - name: "--skip_bbsplit" type: boolean_true description: Skip BBSplit for removal of non-reference genome reads. - # default: true - name: "--remove_ribo_rna" type: boolean_true description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA. - # default: false - - name: "Other options" - arguments: - - name: "--extra_fq_subsample_args" - type: string - default: '--record-count 1000000 --seed 1' - description: Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline. - - name: "Output" arguments: - name: "--qc_output1" @@ -162,78 +137,78 @@ argument_groups: required: false must_exist: false description: Path to output directory - default: $id.read_1.fastq + default: ${id}_r1.fastq.gz - name: "--qc_output2" type: file direction: output required: false must_exist: false description: Path to output directory - default: $id.read_2.fastq + default: ${id}_r2.fastq.gz - name: "--fastqc_html_1" type: file direction: output description: FastQC HTML report for read 1. required: false must_exist: false - default: $id.read_1.fastqc.html + default: ${id}_r1.fastqc.html - name: "--fastqc_html_2" type: file direction: output description: FastQC HTML report for read 2. required: false must_exist: false - default: $id.read_2.fastqc.html + default: ${id}_r2.fastqc.html - name: "--fastqc_zip_1" type: file direction: output description: FastQC report archive for read 1. required: false must_exist: false - default: $id.read_1.fastqc.zip + default: ${id}_r1.fastqc.zip - name: "--fastqc_zip_2" type: file direction: output description: FastQC report archive for read 2. required: false must_exist: false - default: $id.read_2.fastqc.zip + default: ${id}_r2.fastqc.zip - name: "--trim_log_1" type: file direction: output required: false must_exist: false - default: $id.read_1.trimming_report.txt + default: ${id}_r1.trimming_report.txt - name: "--trim_log_2" type: file direction: output required: false must_exist: false - default: $id.read_2.trimming_report.txt + default: ${id}_r2.trimming_report.txt - name: "--trim_html_1" type: file direction: output required: false must_exist: false - default: $id.read_1.trimmed_fastqc.html + default: ${id}_r1.trimmed_fastqc.html - name: "--trim_html_2" type: file direction: output required: false must_exist: false - default: $id.read_2.trimmed_fastqc.html + default: ${id}_r2.trimmed_fastqc.html - name: "--trim_zip_1" type: file direction: output required: false must_exist: false - default: $id.read_1.trimmed_fastqc.zip + default: ${id}_r1.trimmed_fastqc.zip - name: "--trim_zip_2" type: file direction: output required: false must_exist: false - default: $id.read_2.trimmed_fastqc.zip + default: ${id}_r2.trimmed_fastqc.zip - name: "--sortmerna_log" type: file direction: output @@ -267,20 +242,19 @@ resources: dependencies: - name: fastqc - # repository: biobox - - name: umitools/umitools_extract + repository: biobox - name: umi_tools/umi_tools_extract repository: biobox - name: trimgalore - # repository: biobox - - name: bbmap_bbsplit - # repository: biobox + repository: biobox + - name: bbmap/bbmap_bbsplit + repository: biobox - name: sortmerna - # repository: biobox + repository: biobox - name: fastp repository: biobox - name: fq_subsample - # repository: biobox + repository: biobox - name: salmon/salmon_quant repository: biobox diff --git a/src/workflows/pre_processing/main.nf b/src/workflows/pre_processing/main.nf index 55ce1b1..8c451c8 100644 --- a/src/workflows/pre_processing/main.nf +++ b/src/workflows/pre_processing/main.nf @@ -12,48 +12,58 @@ workflow run_wf { [ id, state + [paired: paired, input: input] ] } - // Perform QC on input fastq files | fastqc.run ( runIf: { id, state -> !state.skip_qc && !state.skip_fastqc }, - fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ paired: state.paired, - input: input ] - }, - toState: [ - "fastqc_html_1": "fastqc_html_1", - "fastqc_html_2": "fastqc_html_2", - "fastqc_zip_1": "fastqc_zip_1", - "fastqc_zip_2": "fastqc_zip_2" - ] + fromState: [ "input": "input" ], + toState: {id, output_state, state -> + def newKeys = [ + "fastqc_html_1":output_state["html"][0], + "fastqc_html_2": output_state["html"][1], + "fastqc_zip_1": output_state["zip"][0], + "fastqc_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"] ) // Extract UMIs from fastq files and discard read 1 or read 2 if required - | umitools_extract.run ( + | umi_tools_extract.run ( runIf: { id, state -> state.with_umi && !state.skip_umi_extract }, fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - def bc_pattern = state.paired ? [ state.umitools_bc_pattern, state.umitools_bc_pattern2 ] : [ state.umitools_bc_pattern ] - [ paired: state.paired, - input: input, - bc_pattern: bc_pattern, - umi_discard_read: state.umi_discard_read ] + def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2) + def output = "${id}.r1.fastq.gz" + def read2_out = state.paired ? "${id}.r2.fastq.gz" : state.remove(state.fastq_2) + [ input: state.fastq_1, + read2_in: state.fastq_2, + bc_pattern: state.umitools_bc_pattern, + bc_pattern2: bc_pattern2, + extract_method: state.umitools_extract_method, + umi_separator: state.umitools_umi_separator, + grouping_method: state.umitools_grouping_method, + output: output, + read2_out: read2_out ] }, toState: [ - "fastq_1": "fastq_1", - "fastq_2": "fastq_2" + "fastq_1": "output", + "fastq_2": "read2_out" ] ) // Discard read if required | map { id, state -> def paired = state.paired + def fastq_1 = state.fastq_1 def fastq_2 = state.fastq_2 if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) { - fastq_2 = state.remove(state.fastq_2) + if (state.umi_discard_read == 1) { + fastq_1 = fastq_2 + } + fastq_2 = state.remove(state.fastq_2) paired = false } - [ id, state + [paired: paired, fastq_2: fastq_2] ] + [ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ] } // Trim reads using Trim galore! @@ -63,8 +73,11 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] [ paired: state.paired, input: input, - min_trimmed_reads: state.min_trimmed_reads ] + min_trimmed_reads: state.min_trimmed_reads, + trimmed_r1: state.qc_output1, + trimmed_r2: state.qc_output2 ] }, + args: [gzip: true, fastqc: true], toState: [ "fastq_1": "trimmed_r1", "fastq_2": "trimmed_r2", @@ -74,21 +87,22 @@ workflow run_wf { "trim_zip_2": "trimmed_fastqc_zip_2", "trim_html_1": "trimmed_fastqc_html_1", "trim_html_2": "trimmed_fastqc_html_2" - ], - args: [gzip: true, fastqc: true] + ] ) // Trim reads using fastp | fastp.run( runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, - fromState: [ - "in1": "fastq_1", - "in2": "fastq_2", - "merge": "fastp_save_merged", - "interleaved_in": "interleaved_reads", - "detect_adapter_for_pe": "fastp_pe_detect_adapter", - "adapter_fasta": "fastp_adapter_fasta" - ], + fromState: { id, state -> + def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + [ in1: state.fastq_1, + in2: state.fastq_2, + merge: state.fastp_save_merged, + interleaved_in: state.interleaved_reads, + detect_adapter_for_pe: state.paired, + adapter_fasta: state.fastp_adapter_fasta ] + outputState + }, toState: [ "fastq_1": "out1", "fastq_2": "out2", @@ -102,19 +116,23 @@ workflow run_wf { ) // Perform FASTQC on reads trimmed using fastp - | fastqc.run( + | fastqc.run ( runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, fromState: { id, state -> def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ paired: state.paired, - input: input ] - }, - toState: [ - "trim_html_1": "fastqc_html_1", - "trim_html_2": "fastqc_html_2", - "trim_zip_1": "fastqc_zip_1", - "trim_zip_2": "fastqc_zip_2" - ], + [ input: input ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trim_html_1":output_state["html"][0], + "trim_html_2": output_state["html"][1], + "trim_zip_1": output_state["zip"][0], + "trim_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], key: "fastqc_trimming" ) @@ -125,7 +143,7 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] [ paired: state.paired, input: input, - built_bbsplit_index: state.bbsplit_index ] + build: state.bbsplit_index ] }, args: ["only_build_index": false], toState: [ @@ -141,27 +159,44 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] def filePaths = state.ribo_database_manifest.readLines() def refs = filePaths.collect { it } - [ paired: state.paired, + def other = "${id}_non_rRNA_reads/" + [ paired_in: state.paired, input: input, - ribo_database_manifest: refs ] + ref: refs, + out2: state.paired, + other: other ] }, - toState: [ - "fastq_1": "fastq_1", - "fastq_2": "fastq_2", - "sortmerna_log": "sortmerna_log" - ] + args: [fastx: true, num_alignments: 1], + toState: { id, output_state, state -> + def newKeys = [ + "sortmerna_output": output_state["other"], + "sortmerna_log": output_state["log"] + ] + def new_state = state + newKeys + return new_state + } ) + | map { id, state -> + if (state.remove_ribo_rna) { + def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"} + def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"} + [ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ] + } else { + [ id, state ] + } + } // Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness | fq_subsample.run ( runIf: { id, state -> state.strandedness == 'auto' }, - fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ - input: input, - extra_args: state.extra_fq_subsample_args - ] + fromState: { id, state -> + def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState }, + args: [ + record_count: 1000, + seed: 1 + ], toState: [ "subsampled_fastq_1": "output_1", "subsampled_fastq_2": "output_2" @@ -187,6 +222,7 @@ workflow run_wf { ) [ id, state + [lib_type: lib_type] ] } + | salmon_quant.run ( runIf: { id, state -> state.strandedness == 'auto' }, fromState: { id, state -> @@ -204,17 +240,17 @@ workflow run_wf { toState: [ "salmon_quant_output": "output" ] ) - | map { id, state -> - def mod_state = (!state.paired) ? - [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : - [] - [ id, state + mod_state ] - } + | map { id, state -> + def mod_state = (!state.paired) ? + [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : + [] + [ id, state + mod_state ] + } - | map { id, state -> - def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } - [ id, mod_state ] - } + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } | setState ( "fastqc_html_1": "fastqc_html_1", @@ -230,9 +266,6 @@ workflow run_wf { "trim_html_1": "trim_html_1", "trim_html_2": "trim_html_2", "sortmerna_log": "sortmerna_log", - "failed_trim": "failed_trim", - "failed_trim_unpaired1": "failed_trim_unpaired1", - "failed_trim_unpaired2": "failed_trim_unpaired2", "trim_json": "trim_json", "trim_html": "trim_html", "trim_merged_out": "trim_merged_out", diff --git a/src/workflows/pre_processing/test_run.sh b/src/workflows/pre_processing/test_run.sh index 0fcd6a7..14de080 100755 --- a/src/workflows/pre_processing/test_run.sh +++ b/src/workflows/pre_processing/test_run.sh @@ -1,6 +1,6 @@ #!/bin/bash -viash ns build --parallel --setup cb +viash ns build --parallel --setup cb #-q pre_processing echo "> Preparing reference data files" gunzip --keep testData/minimal_test/reference/genes.gtf.gz @@ -24,10 +24,11 @@ nextflow run target/nextflow/workflows/pre_processing/main.nf \ --salmon_index testData/minimal_test/reference/salmon_index \ --skip_trimming false \ --trimmer trimgalore \ - --remove_ribo_rna false \ - --ribo_database_manifest src/assets/rrna-db-defaults.txt \ + --remove_ribo_rna true \ + --ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \ --skip_bbsplit true \ --bbsplit_index test_results/prepare_genome_test1/BBSplit_index \ + --with_umi false \ -profile docker \ -resume diff --git a/src/workflows/prepare_genome/config.vsh.yaml b/src/workflows/prepare_genome/config.vsh.yaml index dfd609b..e47425f 100644 --- a/src/workflows/prepare_genome/config.vsh.yaml +++ b/src/workflows/prepare_genome/config.vsh.yaml @@ -35,7 +35,8 @@ argument_groups: description: Skip BBSplit for removal of non-reference genome reads. - name: "--bbsplit_fasta_list" type: file - description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s) + description: List of reference genomes (separated by ";") to filter reads against with BBSplit. + multiple: true - name: "--star_index" type: file description: Path to directory or tar.gz archive for pre-built STAR index. @@ -45,18 +46,12 @@ argument_groups: - name: "--rsem_index" type: file description: Path to directory or tar.gz archive for pre-built RSEM index. - - name: extra_rsem_prepare_reference_args - type: string - description: Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline. - name: "--salmon_index" type: file description: Path to directory or tar.gz archive for pre-built Salmon index. - name: "--kallisto_index" type: file description: Path to directory or tar.gz archive for pre-built Kallisto index. - # - name: "--hisat2_index" - # type: file - # description: Path to directory or tar.gz archive for pre-built HISAT2 index. - name: "--bbsplit_index" type: file description: Path to directory or tar.gz archive for pre-built BBSplit index. @@ -125,10 +120,6 @@ argument_groups: direction: output description: Path to Kallisto index. default: Kallisto_index - # - name: "--hisat2_index_uncompressed" - # type: file - # direction: output - # description: Path to directory or tar.gz archive for pre-built HISAT2 index. - name: "--bbsplit_index_uncompressed" type: file direction: output @@ -165,11 +156,12 @@ dependencies: repository: craftbox - name: star/star_genome_generate repository: biobox - - name: bbmap_bbsplit + - name: bbmap/bbmap_bbsplit + repository: biobox - name: salmon/salmon_index repository: biobox - name: kallisto/kallisto_index - # repository: biobox + repository: biobox runners: - type: executable diff --git a/src/workflows/prepare_genome/main.nf b/src/workflows/prepare_genome/main.nf index 3791868..92ed037 100644 --- a/src/workflows/prepare_genome/main.nf +++ b/src/workflows/prepare_genome/main.nf @@ -138,43 +138,45 @@ workflow run_wf { [ id, state + [transcript_fasta: transcript_fasta] ] } - // chromosome size and fai index - | getchromsizes.run ( - fromState: [ "fasta": "fasta" ], - toState: [ - "fai": "fai", - "sizes": "sizes" - ], - key: "chromsizes", - args: [ - fai: "genome_additional.fasta.fai", - sizes: "genome_additional.fasta.sizes" - ] - ) - - // untar bbsplit index, if available - | untar.run ( - runIf: {id, state -> state.bbsplit_index}, - fromState: [ "input": "bbsplit_index" ], - toState: [ "bbsplit_index": "output" ], - key: "untar_bbsplit_index", - args: [output: "BBSplit_index"] - ) - - // create bbsplit index, if not already availble - | bbmap_bbsplit.run ( - runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, - fromState: [ - "primary_ref": "fasta", - "bbsplit_fasta_list": "bbsplit_fasta_list" - ], - toState: [ "bbsplit_index": "bbsplit_index" ], - args: [ - only_build_index: true, - bbsplit_index: "BBSplit_index" - ], - key: "generate_bbsplit_index" - ) + // chromosome size and fai index + | getchromsizes.run ( + fromState: [ "fasta": "fasta" ], + toState: [ + "fai": "fai", + "sizes": "sizes" + ], + key: "chromsizes", + args: [ + fai: "genome_additional.fasta.fai", + sizes: "genome_additional.fasta.sizes" + ] + ) + + // untar bbsplit index, if available + | untar.run ( + runIf: {id, state -> state.bbsplit_index}, + fromState: [ "input": "bbsplit_index" ], + toState: [ "bbsplit_index": "output" ], + key: "untar_bbsplit_index", + args: [output: "BBSplit_index"] + ) + + | map {id, state -> + def ref = [state.fasta] + state.bbsplit_fasta_list + [id, state + [bbsplit_ref: ref] ] + } + + // create bbsplit index, if not already availble + | bbmap_bbsplit.run ( + runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, + fromState: ["ref": "bbsplit_ref"], + toState: [ "bbsplit_index": "index" ], + args: [ + only_build_index: true, + index: "BBSplit_index" + ], + key: "generate_bbsplit_index" + ) // Uncompress STAR index or generate from scratch if required | untar.run ( @@ -251,16 +253,16 @@ workflow run_wf { args: [output: "Kallisto_index"] ) - | kallisto_index.run( - runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, - fromState: [ - "transcriptome_fasta": "transcript_fasta", - "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size" - ], - toState: [ "kallisto_index": "kallisto_index" ], - key: "generate_kallisto_index", - args: [kallisto_index: "Kallisto_index"] - ) + | kallisto_index.run( + runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, + fromState: [ + "input": "transcript_fasta", + "kmer_size": "pseudo_aligner_kmer_size" + ], + toState: [ "kallisto_index": "index" ], + key: "generate_kallisto_index", + args: [index: "Kallisto_index"] + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/src/workflows/prepare_genome/test_run.sh b/src/workflows/prepare_genome/test_run.sh index 4c274cd..7a286c2 100755 --- a/src/workflows/prepare_genome/test_run.sh +++ b/src/workflows/prepare_genome/test_run.sh @@ -1,6 +1,6 @@ #!/bin/bash -# viash ns build --setup cb --parallel -q prepare_genome +viash ns build --setup cb --parallel # echo "Test 1: Annotation file format - GTF" # nextflow run target/nextflow/workflows/prepare_genome/main.nf \ @@ -12,7 +12,7 @@ # --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ # --genotype false \ # --biotype gene_biotype \ -# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ +# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ # --salmon_index testData/minimal_test/reference/salmon.tar.gz \ # --rsem_index testData/minimal_test/reference/rsem.tar.gz \ # -profile docker \ @@ -28,7 +28,7 @@ # --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ # --genotype false \ # --biotype gene_biotype \ -# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ +# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ # --salmon_index testData/minimal_test/reference/salmon.tar.gz \ # --rsem_index testData/minimal_test/reference/rsem.tar.gz \ # -profile docker \ @@ -43,7 +43,7 @@ nextflow run target/nextflow/workflows/prepare_genome/main.nf \ --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ --genotype false \ --biotype gene_biotype \ - --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ + --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ --pseudo_aligner kallisto \ --aligner star_rsem \ -profile docker \ diff --git a/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml b/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml index 976bc06..8a51f4b 100644 --- a/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml +++ b/src/workflows/pseudo_alignment_and_quant/config.vsh.yaml @@ -4,73 +4,73 @@ description: | A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline. argument_groups: -- name: "Input" - arguments: - - name: "--id" - required: true - type: string - description: ID of the sample. - example: foo - - name: "--fastq_1" - alternatives: [-i] - type: file - description: Path to the sample (or read 1 of paired end sample). - required: true - example: input.fastq.gz - - name: "--fastq_2" - type: file - required: false - description: Path to read 2 of the sample. - - name: "--strandedness" - type: string - required: false - description: Sample strand-specificity. Must be one of unstranded, forward, or reverse - choices: [forward, reverse, unstranded] - - name: "--gtf" - type: file - description: GTF file - - name: "--transcript_fasta" - type: file - description: Fasta file of the reference transcriptome. - - name: "--pseudo_aligner" - type: string - default: false - description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'. - choices: [salmon, kallisto] - - name: "--salmon_index" - type: file - description: Salmon index - - name: "--kallisto_index" - type: file - description: Kallisto index - - name: "--lib_type" - type: string - description: Override library type inferred based on strandedness defined in meta object - default: '' - - name: "--kallisto_quant_fragment_length" - type: integer - description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto. - - name: "--kallisto_quant_fragment_length_sd" - type: integer - description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto. - -- name: "Output" - arguments: - - name: "--pseudo_multiqc" - type: file - direction: output - - name: "--quant_out_dir" - type: file - direction: output - default: $id.quant - - name: "--salmon_quant_results_file" - type: file - direction: output - default: $id.quant.sf - - name: "--kallisto_quant_results_file" - type: file - direction: output - default: $id.abundance.tsv + - name: "Input" + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--fastq_1" + alternatives: [-i] + type: file + description: Path to the sample (or read 1 of paired end sample). + required: true + example: input.fastq.gz + - name: "--fastq_2" + type: file + required: false + description: Path to read 2 of the sample. + - name: "--strandedness" + type: string + required: false + description: Sample strand-specificity. Must be one of unstranded, forward, or reverse + choices: [forward, reverse, unstranded] + - name: "--gtf" + type: file + description: GTF file + - name: "--transcript_fasta" + type: file + description: Fasta file of the reference transcriptome. + - name: "--pseudo_aligner" + type: string + default: false + description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'. + choices: [salmon, kallisto] + - name: "--salmon_index" + type: file + description: Salmon index + - name: "--kallisto_index" + type: file + description: Kallisto index + - name: "--lib_type" + type: string + description: Override library type inferred based on strandedness defined in meta object + default: '' + - name: "--kallisto_quant_fragment_length" + type: double + description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto. + - name: "--kallisto_quant_fragment_length_sd" + type: double + description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto. + + - name: "Output" + arguments: + - name: "--pseudo_multiqc" + type: file + direction: output + - name: "--quant_out_dir" + type: file + direction: output + default: $id.quant + - name: "--salmon_quant_results_file" + type: file + direction: output + default: $id.quant.sf + - name: "--kallisto_quant_results_file" + type: file + direction: output + default: $id.abundance.tsv resources: - type: nextflow_script @@ -81,6 +81,7 @@ dependencies: - name: salmon/salmon_quant repository: biobox - name: kallisto/kallisto_quant + repository: biobox runners: - type: executable diff --git a/src/workflows/pseudo_alignment_and_quant/main.nf b/src/workflows/pseudo_alignment_and_quant/main.nf index 2f05133..82f1aa9 100644 --- a/src/workflows/pseudo_alignment_and_quant/main.nf +++ b/src/workflows/pseudo_alignment_and_quant/main.nf @@ -57,22 +57,32 @@ workflow run_wf { [ id, mod_state ] } - | kallisto_quant.run ( - runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, - fromState: [ - "input": "input", - "paired": "paired", - "gtf": "gtf", - "index": "kallisto_index", - "fragment_length": "kallisto_quant_fragment_length", - "fragment_length_sd": "kallisto_quant_fragment_length_sd" - ], - toState: [ - "quant_out_dir": "output", - "kallisto_quant_results_file": "quant_results_file", - "pseudo_multiqc": "log" + | kallisto_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, + fromState: { id, state -> + def fr_stranded = state.strandedness == 'forward' + def rf_stranded = state.strandedness == 'reverse' + [ + input: state.input, + index: state.kallisto_index, + fragment_length: state.kallisto_quant_fragment_length, + sd: state.kallisto_quant_fragment_length_sd, + single: !state.paired, + fr_stranded: fr_stranded, + rf_stranded: rf_stranded, ] - ) + }, + args: [log: "kallisto_quant.log"], + toState: { id, output_state, state -> + def neKeys = [ + "quant_out_dir": output_state["output_dir"], + "kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv", + "pseudo_multiqc": output_state["log"] + ] + def new_state = state + newKeys + return new_state + } + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/src/workflows/pseudo_alignment_and_quant/test_run.sh b/src/workflows/pseudo_alignment_and_quant/test_run.sh index cdc2bd0..3beb721 100755 --- a/src/workflows/pseudo_alignment_and_quant/test_run.sh +++ b/src/workflows/pseudo_alignment_and_quant/test_run.sh @@ -1,6 +1,6 @@ #!/bin/bash -# viash ns build --setup cb -q pseudo_alignment_and_quant +viash ns build --setup cb --parallel #-q pseudo_alignment_and_quant # Split error message from standard output # viash ns list > /dev/null @@ -16,30 +16,32 @@ WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse HERE -echo "> Test 1: Salmon qunatification" -nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \ - --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ - --publish_dir "test_results/pseudo_alignment_test1" \ - --fasta testData/minimal_test/reference/genome.fasta \ - --gtf testData/minimal_test/reference/genes.gtf.gz \ - --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ - --salmon_index testData/minimal_test/reference/salmon_index \ - --pseudo_aligner salmon \ - -profile docker \ - -resume - -# echo "> Test 2: Kallisto qunatification" +# echo "> Test 1: Salmon qunatification" # nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \ # --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ -# --publish_dir "test_results/pseudo_alignment_test2" \ +# --publish_dir "test_results/pseudo_alignment_test1" \ # --fasta testData/minimal_test/reference/genome.fasta \ # --gtf testData/minimal_test/reference/genes.gtf.gz \ # --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ -# --kallisto_index test_results/prepare_genome_test3/Kallisto_index \ -# --pseudo_aligner kallisto \ +# --salmon_index testData/minimal_test/reference/salmon_index \ +# --pseudo_aligner salmon \ # -profile docker \ # -resume +echo "> Test 2: Kallisto qunatification" +nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \ + --param_list testData/minimal_test/input_fastq/sample_sheet.csv \ + --publish_dir "test_results/pseudo_alignment_test2" \ + --fasta testData/minimal_test/reference/genome.fasta \ + --gtf testData/minimal_test/reference/genes.gtf.gz \ + --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ + --kallisto_index test_results/prepare_genome_test3/Kallisto_index \ + --pseudo_aligner kallisto \ + --kallisto_quant_fragment_length 101.0 \ + --kallisto_quant_fragment_length_sd 50.0 \ + -profile docker \ + -resume + echo "Removing reference data files" rm testData/minimal_test/reference/genes.gtf rm -r testData/minimal_test/reference/salmon_index diff --git a/src/workflows/quality_control/config.vsh.yaml b/src/workflows/quality_control/config.vsh.yaml index 0755c04..97ade9e 100644 --- a/src/workflows/quality_control/config.vsh.yaml +++ b/src/workflows/quality_control/config.vsh.yaml @@ -112,9 +112,6 @@ argument_groups: - name: "--biotype" type: string description: Biotype value to use while appending entries to GTF file when additional fasta file is provided. - - name: "--extra_featurecounts_args" - type: string - description: Extra arguments to pass to featureCounts command in addition to defaults defined by the pipeline # RSeQC - name: "--rseqc_modules" @@ -207,12 +204,6 @@ argument_groups: description: Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads. # Qualimap - - name: "--output_format" - type: string - required: false - default: html - choices: [ html, pdf ] - description: Format of the qualimap output report (PDF or HTML, default is HTML) - name: "--pr_bases" type: integer required: false @@ -291,8 +282,6 @@ argument_groups: - name: "--star_multiqc" type: file must_exist: false - # - name: "--hisat2_multiqc" - # type: file - name: "--rsem_multiqc" type: file - name: "--genome_bam_stats" @@ -503,17 +492,20 @@ argument_groups: default: $id.intercept_slope.txt # Qualimap - - name: "--qualimap_output_pdf" + - name: "--qualimap_qc_report" + direction: output + type: file + example: $id.rnaseq_qc_results.txt + description: Text file containing the RNAseq QC results. + - name: "--qualimap_counts" type: file direction: output - required: false - must_exist: false - default: $id.qualimap_output.pdf - - name: "--qualimap_output_dir" + description: Output file for computed counts. + - name: "--qualimap_report" type: file direction: output - required: false - default: $id.qualimap_output + example: $id.report.html + description: Report output file. Supported formats are PDF or HTML. # DESeq2 - name: "--deseq2_output" @@ -626,17 +618,19 @@ resources: dependencies: - name: rseqc/rseqc_bamstat + repository: biobox - name: rseqc/rseqc_inferexperiment - - name: rseqc/rseqc_innerdistance + repository: biobox + - name: rseqc/rseqc_inner_distance + repository: biobox - name: rseqc/rseqc_junctionannotation - name: rseqc/rseqc_junctionsaturation - name: rseqc/rseqc_readdistribution - name: rseqc/rseqc_readduplication - name: rseqc/rseqc_tin - name: dupradar - - name: qualimap - # - name: qualimap/qualimap_rnaseq - # repository: biobox + - name: qualimap/qualimap_rnaseq + repository: biobox - name: preseq_lcextrap - name: featurecounts repository: biobox @@ -645,7 +639,7 @@ dependencies: - name: prepare_multiqc_input - name: multiqc repository: biobox - - name: rsem/rsem_merge_counts + - name: rsem_merge_counts - name: workflows/merge_quant_results runners: diff --git a/src/workflows/quality_control/main.nf b/src/workflows/quality_control/main.nf index 6eca35a..d90cc07 100644 --- a/src/workflows/quality_control/main.nf +++ b/src/workflows/quality_control/main.nf @@ -39,145 +39,145 @@ workflow run_wf { ] ) - | multiqc_custom_biotype.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, - fromState: [ - "id": "id", - "biocounts": "featurecounts", - "biotypes_header": "biotypes_header" - ], - toState: [ - "featurecounts_multiqc": "featurecounts_multiqc", - "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" - ] - ) - - | preseq_lcextrap.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, - fromState: [ - "paired": "paired", - "input": "genome_bam", - "extra_preseq_args": "extra_preseq_args" - ], - toState: [ "preseq_output": "output" ] - ) - - | rseqc_bamstat.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "map_qual": "map_qual" - ], - toState: [ "bamstat_output": "output" ] - ) - | rseqc_inferexperiment.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sample_size": "sample_size", - "map_qual": "map_qual" - ], - toState: [ "strandedness_output": "output" ] - ) - // Get predicted strandedness from the RSeQC infer_experiment.py output - | map { id, state -> - def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) - def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true - [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] - } - | rseqc_innerdistance.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, - key: "inner_distance", - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sample_size": "sample_size", - "map_qual": "map_qual", - "lower_bound_size": "lower_bound_size", - "upper_bound_size": "upper_bound_size", - "step_size": "step_size" - ], - toState: [ - "inner_dist_output_stats": "output_stats", - "inner_dist_output_dist": "output_dist", - "inner_dist_output_freq": "output_freq", - "inner_dist_output_plot": "output_plot", - "inner_dist_output_plot_r": "output_plot_r" - ] - ) - | rseqc_junctionannotation.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "map_qual": "map_qual", - "min_intron": "min_intron" - ], - toState: [ - "junction_annotation_output_log": "output_log", - "junction_annotation_output_plot_r": "output_plot_r", - "junction_annotation_output_junction_bed": "output_junction_bed", - "junction_annotation_output_junction_interact": "output_junction_interact", - "junction_annotation_output_junction_sheet": "output_junction_sheet", - "junction_annotation_output_splice_events_plot": "output_splice_events_plot", - "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" - ] - ) - | rseqc_junctionsaturation.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", - "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", - "sampling_percentile_step": "sampling_percentile_step", - "min_intron": "min_intron", - "min_splice_read": "min_splice_read", - "map_qual": "map_qual" - ], - toState: [ - "junction_saturation_output_plot_r": "output_plot_r", - "junction_saturation_output_plot": "output_plot" - ] - ) - | rseqc_readdistribution.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - ], - toState: [ "read_distribution_output": "output" ] - ) - | rseqc_readduplication.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "read_count_upper_limit": "read_count_upper_limit", - "map_qual": "map_qual" - ], - toState: [ - "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", - "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", - "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", - "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" - ] - ) - | rseqc_tin.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "bam_input": "genome_bam", - "bai_input": "genome_bam_index", - "refgene": "gene_bed", - "minimum_coverage": "minimum_coverage", - "sample_size": "tin_sample_size", - "subtract_background": "subtract_background" - ], - toState: [ - "tin_output_summary": "output_tin_summary", - "tin_output_metrics": "output_tin" - ] - ) + | multiqc_custom_biotype.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, + fromState: [ + "id": "id", + "biocounts": "featurecounts", + "biotypes_header": "biotypes_header" + ], + toState: [ + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" + ] + ) + + | preseq_lcextrap.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, + fromState: [ + "paired": "paired", + "input": "genome_bam", + "extra_preseq_args": "extra_preseq_args" + ], + toState: [ "preseq_output": "output" ] + ) + + | rseqc_bamstat.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "mapq": "map_qual" + ], + toState: [ "bamstat_output": "output" ] + ) + | rseqc_inferexperiment.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual" + ], + toState: [ "strandedness_output": "output" ] + ) + // Get predicted strandedness from the RSeQC infer_experiment.py output + | map { id, state -> + def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) + def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true + [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] + } + | rseqc_inner_distance.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, + key: "inner_distance", + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual", + "lower_bound": "lower_bound_size", + "upper_bound": "upper_bound_size", + "step": "step_size" + ], + toState: [ + "inner_dist_output_stats": "output_stats", + "inner_dist_output_dist": "output_dist", + "inner_dist_output_freq": "output_freq", + "inner_dist_output_plot": "output_plot", + "inner_dist_output_plot_r": "output_plot_r" + ] + ) + | rseqc_junctionannotation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "map_qual": "map_qual", + "min_intron": "min_intron" + ], + toState: [ + "junction_annotation_output_log": "output_log", + "junction_annotation_output_plot_r": "output_plot_r", + "junction_annotation_output_junction_bed": "output_junction_bed", + "junction_annotation_output_junction_interact": "output_junction_interact", + "junction_annotation_output_junction_sheet": "output_junction_sheet", + "junction_annotation_output_splice_events_plot": "output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" + ] + ) + | rseqc_junctionsaturation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", + "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", + "sampling_percentile_step": "sampling_percentile_step", + "min_intron": "min_intron", + "min_splice_read": "min_splice_read", + "map_qual": "map_qual" + ], + toState: [ + "junction_saturation_output_plot_r": "output_plot_r", + "junction_saturation_output_plot": "output_plot" + ] + ) + | rseqc_readdistribution.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + ], + toState: [ "read_distribution_output": "output" ] + ) + | rseqc_readduplication.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "read_count_upper_limit": "read_count_upper_limit", + "map_qual": "map_qual" + ], + toState: [ + "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" + ] + ) + | rseqc_tin.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "bam_input": "genome_bam", + "bai_input": "genome_bam_index", + "refgene": "gene_bed", + "minimum_coverage": "minimum_coverage", + "sample_size": "tin_sample_size", + "subtract_background": "subtract_background" + ], + toState: [ + "tin_output_summary": "output_tin_summary", + "tin_output_metrics": "output_tin" + ] + ) | dupradar.run( runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align }, @@ -199,23 +199,25 @@ workflow run_wf { ] ) - | qualimap.run( - runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "gtf": "gtf", - "pr_bases": "pr_bases", - "tr_bias": "tr_bias", - "algorithm": "algorithm", - "sequencing_protocol": "sequencing_protocol", - "sorted": "sorted", - "java_memory_size": "java_memory_size", - ], - toState: [ - "qualimap_output_pdf": "output_pdf", - "qualimap_output_dir": "output_dir" - ] - ) + // TODO: Add outdir as an output argument to the qualimap module on biobox. + // Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost. + | qualimap_rnaseq.run( + fromState: [ + "bam": "genome_bam", + "gtf": "gtf", + "num_pr_bases": "pr_bases", + "num_tr_bias": "tr_bias", + "algorithm": "algorithm", + "sequencing_protocol": "sequencing_protocol", + "sorted": "sorted", + "java_memory_size": "java_memory_size", + ], + toState: [ + "qualimap_report": "report", + "qualimap_qc_report": "qc_report", + "qualimap_counts": "counts" + ] + ) merged_ch = qc_ch | toSortedList @@ -338,10 +340,10 @@ workflow run_wf { (state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ? state.preseq_output : null } - def qualimap_output_dir = list.collect { id, state -> - (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? - state.qualimap_output_dir : - null } + // def qualimap_output_dir = list.collect { id, state -> + // (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? + // state.qualimap_output_dir : + // null } def dupradar_output_dup_intercept_mqc = list.collect { id, state -> (state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ? state.dupradar_output_dup_intercept_mqc : @@ -426,7 +428,7 @@ workflow run_wf { featurecounts_multiqc: featurecounts_multiqc, featurecounts_rrna_multiqc: featurecounts_rrna_multiqc, preseq_output: preseq_output, - qualimap_output_dir: qualimap_output_dir, + // qualimap_output_dir: qualimap_output_dir, dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc, dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc, bamstat_output: bamstat_output, @@ -605,7 +607,7 @@ workflow run_wf { "pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo", "pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo", "preseq_multiqc": "preseq_output", - "qualimap_multiqc": "qualimap_output_dir", + // "qualimap_multiqc": "qualimap_output_dir", "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "bamstat_multiqc": "bamstat_output", @@ -705,8 +707,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "featurecounts": "featurecounts", "featurecounts_summary": "featurecounts_summary", "featurecounts_multiqc": "featurecounts_multiqc", diff --git a/src/workflows/rnaseq/config.vsh.yaml b/src/workflows/rnaseq/config.vsh.yaml index e7704f2..4d4f4df 100644 --- a/src/workflows/rnaseq/config.vsh.yaml +++ b/src/workflows/rnaseq/config.vsh.yaml @@ -70,13 +70,6 @@ argument_groups: - name: "--kallisto_index" type: file description: Path to directory or tar.gz archive for pre-built Kallisto index. - # - name: "--hisat2_index" - # type: file - # description: Path to directory or tar.gz archive for pre-built HISAT2 index. - # - name: "--hisat2_build_memory" - # type: string - # description: Minimum memory required to use splice sites and exons in the HiSAT2 index build process. - # default: 200.GB - name: "--gencode" type: boolean_true description: Specify if the GTF annotation is in GENCODE format. @@ -107,12 +100,6 @@ argument_groups: description: Specify the trimming tool to use. choices: ["trimgalore", "fastp"] default: "trimgalore" - - name: "--extra_trimgalore_args" - type: string - description: Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline. - - name: "--extra_fastp_args" - type: string - description: Extra arguments to pass to fastp command in addition to defaults defined by the pipeline. - name: "--min_trimmed_reads" type: integer description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low. @@ -122,7 +109,8 @@ argument_groups: arguments: - name: "--bbsplit_fasta_list" type: file - description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s) + description: List of reference genomes (separated by ";") to filter reads against with BBSplit. + multiple: true - name: "--bbsplit_index" type: file description: Path to directory or tar.gz archive for pre-built BBSplit index. @@ -185,10 +173,10 @@ argument_groups: description: Kmer length passed to indexing step of pseudoaligners. default: 31 - name: "--kallisto_quant_fragment_length" - type: integer + type: double description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto. - name: "--kallisto_quant_fragment_length_sd" - type: integer + type: double description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto. - name: "--bam_csi_index" type: boolean_true @@ -196,10 +184,6 @@ argument_groups: - name: "--salmon_quant_libtype" type: string description: Override Salmon library type inferred based on strandedness defined in meta object. - - name: "--extra_salmon_quant_args" - type: string - default: '-v' - description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline. - name: "--min_mapped_reads" type: integer description: Minimum percentage of uniquely mapped reads below which samples are removed from further processing. @@ -223,10 +207,6 @@ argument_groups: - name: "--skip_pseudo_alignment" type: boolean_true description: Skip all of the pseudo-alignment-based processes within the pipeline. - - name: --extra_rsem_calculate_expression_args - type: string - description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline. - default: '--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1' - name: Process skipping options arguments: @@ -281,18 +261,10 @@ argument_groups: - name: Other process arguments arguments: - - name: "--extra_fq_subsample_args" - type: string - default: ' --record-count 1000000 --seed 1' - description: Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline. - name: "--extra_picard_args" type: string default: ' --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' description: Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline. - - name: "--extra_bedtools_args" - type: string - default: ' -split -du' - description: Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline. - name: "--extra_preseq_args" type: string description: Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline @@ -367,14 +339,14 @@ argument_groups: required: false must_exist: false description: Path to output directory - default: fastq/$id.read_1.fastq.gz + default: fastq/${id}_r1.fastq.gz - name: "--output_fastq_2" type: file direction: output required: false must_exist: false description: Path to output directory - default: fastq/$id.read_2.fastq.gz + default: fastq/${id}_r2.fastq.gz # FastQC - name: "--fastqc_html_1" @@ -383,52 +355,52 @@ argument_groups: description: FastQC HTML report for read 1. required: false must_exist: false - default: fastqc_raw/$id.read_1.fastqc.html + default: fastqc_raw/${id}_r1.fastqc.html - name: "--fastqc_html_2" type: file direction: output description: FastQC HTML report for read 2. required: false must_exist: false - default: fastqc_raw/$id.read_2.fastqc.html + default: fastqc_raw/${id}_r2.fastqc.html - name: "--fastqc_zip_1" type: file direction: output description: FastQC report archive for read 1. required: false must_exist: false - default: fastqc_raw/$id.read_1.fastqc.zip + default: fastqc_raw/${id}_r1.fastqc.zip - name: "--fastqc_zip_2" type: file direction: output description: FastQC report archive for read 2. required: false must_exist: false - default: fastqc_raw/$id.read_2.fastqc.zip + default: fastqc_raw/${id}_r2.fastqc.zip - name: "--trim_html_1" type: file direction: output required: false must_exist: false - default: fastqc_trim/$id.read_1.trimmed_fastqc.html + default: fastqc_trim/${id}_r1.trimmed_fastqc.html - name: "--trim_html_2" type: file direction: output required: false must_exist: false - default: fastqc_trim/$id.read_2.trimmed_fastqc.html + default: fastqc_trim/${id}_r2.trimmed_fastqc.html - name: "--trim_zip_1" type: file direction: output required: false must_exist: false - default: fastqc_trim/$id.read_1.trimmed_fastqc.zip + default: fastqc_trim/${id}_r1.trimmed_fastqc.zip - name: "--trim_zip_2" type: file direction: output required: false must_exist: false - default: fastqc_trim/$id.read_2.trimmed_fastqc.zip + default: fastqc_trim/${id}_r2.trimmed_fastqc.zip # TrimGalore - name: "--trim_log_1" @@ -436,13 +408,13 @@ argument_groups: direction: output required: false must_exist: false - default: trimgalore/$id.read_1.trimming_report.txt + default: trimgalore/${id}_r1.trimming_report.txt - name: "--trim_log_2" type: file direction: output required: false must_exist: false - default: trimgalore/$id.read_2.trimming_report.txt + default: trimgalore/${id}_r2.trimming_report.txt # fastp - name: --fastp_trim_json @@ -842,17 +814,21 @@ argument_groups: default: dupradar/intercept_slope/$id.intercept_slope.txt # Qualimap - - name: "--qualimap_output_pdf" + - name: "--qualimap_qc_report" + direction: output + type: file + default: Qualimap/$id.rnaseq_qc_results.txt + description: Text file containing the RNAseq QC results. + - name: "--qualimap_counts" type: file direction: output - required: false - must_exist: false - default: qualimap/$id.qualimap_output.pdf - - name: "--qualimap_output_dir" + default: Qualimap/$id.counts.txt + description: Output file for computed counts. + - name: "--qualimap_report" type: file direction: output - required: false - default: qualimap/$id + default: Qualimap/$id.report.html + description: Report output file. Supported formats are PDF or HTML. # DESeq2 - name: "--deseq2_output" diff --git a/src/workflows/rnaseq/main.nf b/src/workflows/rnaseq/main.nf index efe9a51..0c52e18 100644 --- a/src/workflows/rnaseq/main.nf +++ b/src/workflows/rnaseq/main.nf @@ -419,8 +419,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "featurecounts": "featurecounts", "featurecounts_summary": "featurecounts_summary", "featurecounts_multiqc": "featurecounts_multiqc", @@ -534,8 +535,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "tpm_gene": "tpm_gene", "counts_gene": "counts_gene", "counts_gene_length_scaled": "counts_gene_length_scaled", diff --git a/src/workflows/rnaseq/test_run.sh b/src/workflows/rnaseq/test_run.sh index ed9ed02..ecca3a9 100755 --- a/src/workflows/rnaseq/test_run.sh +++ b/src/workflows/rnaseq/test_run.sh @@ -1,6 +1,6 @@ #!/bin/bash -# viash ns build --setup cb --parallel +viash ns build --setup cb --parallel cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE id,fastq_1,fastq_2,strandedness @@ -19,7 +19,7 @@ nextflow run target/nextflow/workflows/rnaseq/main.nf \ --gtf testData/minimal_test/reference/genes.gtf.gz \ --additional_fasta testData/minimal_test/reference/gfp.fa.gz \ --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \ - --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \ + --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \ --skip_pseudo_alignment \ -profile docker \ --resume diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml new file mode 100644 index 0000000..0a4f0f3 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/.config.vsh.yaml @@ -0,0 +1,395 @@ +name: "bbmap_bbsplit" +namespace: "bbmap" +version: "main" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "Sample ID" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--paired" + description: "Paired fastq files or not?" + info: null + direction: "input" + - type: "file" + name: "--input" + description: "Input fastq files, either one or two (paired), separated by \";\"\ + ." + info: null + example: + - "reads.fastq" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--ref" + description: "Reference FASTA files, separated by \";\". The primary reference\ + \ should be specified first." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "boolean_true" + name: "--only_build_index" + description: "If set, only builds the index. Otherwise, mapping is performed." + info: null + direction: "input" + - type: "file" + name: "--build" + description: "Index to be used for mapping. \n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--qin" + description: "Set to 33 or 64 to specify input quality value ASCII offset. Automatically\ + \ detected if\nnot specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--interleaved" + description: "True forces paired/interleaved input; false forces single-ended\ + \ mapping.\nIf not specified, interleaved status will be autodetected from read\ + \ names.\n" + info: null + direction: "input" + - type: "integer" + name: "--maxindel" + description: "Don't look for indels longer than this. Lower is faster. Set to\ + \ >=100k for RNA-seq.\n" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--minratio" + description: "Fraction of max alignment score required to keep a site. Higher\ + \ is faster.\n" + info: null + example: + - 0.56 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--minhits" + description: "Minimum number of seed hits required for candidate sites. Higher\ + \ is faster.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--ambiguous" + description: "Set behavior on ambiguously-mapped reads (with multiple top-scoring\ + \ mapping locations).\n * best Use the first best site (Default)\n * toss\ + \ Consider unmapped\n * random Select one top-scoring site randomly\n \ + \ * all Retain all top-scoring sites. Does not work yet with SAM output\n" + info: null + example: + - "best" + required: false + choices: + - "best" + - "toss" + - "random" + - "all" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--ambiguous2" + description: "Set behavior only for reads that map ambiguously to multiple different\ + \ references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\n\ + Ambiguous2 excludes reads that map ambiguously within a single reference.\n\ + \ * best Use the first best site (Default)\n * toss Consider unmapped\n\ + \ * all Write a copy to the output for each reference to which it maps\n\ + \ * split Write a copy to the AMBIGUOUS_ output for each reference to which\ + \ it maps\n" + info: null + example: + - "best" + required: false + choices: + - "best" + - "toss" + - "all" + - "split" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--qtrim" + description: "Quality-trim ends to Q5 before mapping. Options are 'l' (left),\ + \ 'r' (right), and 'lr' (both).\n" + info: null + required: false + choices: + - "l" + - "r" + - "lr" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--untrim" + description: "Undo trimming after mapping. Untrimmed bases will be soft-clipped\ + \ in cigar strings." + info: null + direction: "input" +- name: "Output" + arguments: + - type: "file" + name: "--index" + description: "Location to write the index.\n" + info: null + example: + - "BBSplit_index" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_1" + description: "Output file for read 1.\n" + info: null + example: + - "read_out1.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastq_2" + description: "Output file for read 2.\n" + info: null + example: + - "read_out2.fastq" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--sam2bam" + alternatives: + - "--bs" + description: "Write a shell script to 'file' that will turn the sam output into\ + \ a sorted, indexed bam file.\n" + info: null + example: + - "script.sh" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--scafstats" + description: "Write statistics on how many reads mapped to which scaffold to this\ + \ file.\n" + info: null + example: + - "scaffold_stats.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--refstats" + description: "Write statistics on how many reads were assigned to which reference\ + \ to this file.\nUnmapped reads whose mate mapped to a reference are considered\ + \ assigned and will be counted.\n" + info: null + example: + - "reference_stats.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--nzo" + description: "Only print lines with nonzero coverage." + info: null + direction: "input" + - type: "string" + name: "--bbmap_args" + description: "Additional arguments from BBMap to pass to BBSplit.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Split sequencing reads by mapping them to multiple references simultaneously." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +license: "BBTools Copyright (c) 2014" +links: + repository: "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh" + homepage: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/" + documentation: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\ + \ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\ + \ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n" + - type: "docker" + run: + - "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \"BBMAP:\", $NF}' >\ + \ /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bbmap/bbmap_bbsplit/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bbmap/bbmap_bbsplit" + executable: "target/nextflow/bbmap/bbmap_bbsplit/main.nf" + viash_version: "0.9.0" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" +package_config: + name: "biobox" + version: "main" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/umitools/umitools_extract/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf similarity index 89% rename from target/nextflow/umitools/umitools_extract/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf index 4acd900..dbf9c6e 100644 --- a/target/nextflow/umitools/umitools_extract/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf @@ -1,4 +1,4 @@ -// umitools_extract main +// bbmap_bbsplit main // // This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -2804,19 +2804,91 @@ nextflow.enable.dsl=2 meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ - "name" : "umitools_extract", - "namespace" : "umitools", + "name" : "bbmap_bbsplit", + "namespace" : "bbmap", "version" : "main", "argument_groups" : [ { "name" : "Input", "arguments" : [ { - "type" : "boolean", + "type" : "string", + "name" : "--id", + "description" : "Sample ID", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", "name" : "--paired", "description" : "Paired fastq files or not?", - "default" : [ - false + "direction" : "input" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input fastq files, either one or two (paired), separated by \\";\\".", + "example" : [ + "reads.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ref", + "description" : "Reference FASTA files, separated by \\";\\". The primary reference should be specified first.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--only_build_index", + "description" : "If set, only builds the index. Otherwise, mapping is performed.", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--build", + "description" : "Index to be used for mapping. \n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--qin", + "description" : "Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--interleaved", + "description" : "True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--maxindel", + "description" : "Don't look for indels longer than this. Lower is faster. Set to >=100k for RNA-seq.\n", + "example" : [ + 20 ], "required" : false, "direction" : "input", @@ -2824,27 +2896,84 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "file", - "name" : "--input", - "description" : "Input fastq files, either one or two (paired)", + "type" : "double", + "name" : "--minratio", + "description" : "Fraction of max alignment score required to keep a site. Higher is faster.\n", "example" : [ - "sample.fastq" + 0.56 ], - "must_exist" : true, - "create_parent" : true, - "required" : true, + "required" : false, "direction" : "input", - "multiple" : true, - "multiple_sep" : "," + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--minhits", + "description" : "Minimum number of seed hits required for candidate sites. Higher is faster.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" }, { "type" : "string", - "name" : "--bc_pattern", - "description" : "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI.", + "name" : "--ambiguous", + "description" : "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n", + "example" : [ + "best" + ], "required" : false, + "choices" : [ + "best", + "toss", + "random", + "all" + ], "direction" : "input", - "multiple" : true, - "multiple_sep" : "," + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--ambiguous2", + "description" : "Set behavior only for reads that map ambiguously to multiple different references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n", + "example" : [ + "best" + ], + "required" : false, + "choices" : [ + "best", + "toss", + "all", + "split" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--qtrim", + "description" : "Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r' (right), and 'lr' (both).\n", + "required" : false, + "choices" : [ + "l", + "r", + "lr" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--untrim", + "description" : "Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings.", + "direction" : "input" } ] }, @@ -2853,14 +2982,28 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--fastq_1", - "description" : "Output file for read 1.", - "default" : [ - "$id.$key.read_1.fastq" + "name" : "--index", + "description" : "Location to write the index.\n", + "example" : [ + "BBSplit_index" ], "must_exist" : true, "create_parent" : true, - "required" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastq_1", + "description" : "Output file for read 1.\n", + "example" : [ + "read_out1.fastq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, "direction" : "output", "multiple" : false, "multiple_sep" : ";" @@ -2868,82 +3011,73 @@ meta = [ { "type" : "file", "name" : "--fastq_2", - "description" : "Output file for read 2.", - "default" : [ - "$id.$key.read_2.fastq" + "description" : "Output file for read 2.\n", + "example" : [ + "read_out2.fastq" ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", "multiple" : false, "multiple_sep" : ";" - } - ] - }, - { - "name" : "Optional arguments", - "arguments" : [ + }, { - "type" : "string", - "name" : "--umitools_extract_method", - "description" : "UMI pattern to use.", - "default" : [ - "string" + "type" : "file", + "name" : "--sam2bam", + "alternatives" : [ + "--bs" ], + "description" : "Write a shell script to 'file' that will turn the sam output into a sorted, indexed bam file.\n", + "example" : [ + "script.sh" + ], + "must_exist" : true, + "create_parent" : true, "required" : false, - "choices" : [ - "string", - "regex" - ], - "direction" : "input", + "direction" : "output", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "string", - "name" : "--umitools_umi_separator", - "description" : "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.", - "default" : [ - "_" + "type" : "file", + "name" : "--scafstats", + "description" : "Write statistics on how many reads mapped to which scaffold to this file.\n", + "example" : [ + "scaffold_stats.txt" ], + "must_exist" : true, + "create_parent" : true, "required" : false, - "direction" : "input", + "direction" : "output", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "string", - "name" : "--umitools_grouping_method", - "description" : "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", - "default" : [ - "directional" + "type" : "file", + "name" : "--refstats", + "description" : "Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n", + "example" : [ + "reference_stats.txt" ], + "must_exist" : true, + "create_parent" : true, "required" : false, - "choices" : [ - "unique", - "percentile", - "cluster", - "adjacency", - "directional" - ], - "direction" : "input", + "direction" : "output", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "integer", - "name" : "--umi_discard_read", - "description" : "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.", - "default" : [ - 0 - ], + "type" : "boolean_true", + "name" : "--nzo", + "description" : "Only print lines with nonzero coverage.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--bbmap_args", + "description" : "Additional arguments from BBMap to pass to BBSplit.\n", "required" : false, - "choices" : [ - 0, - 1, - 2 - ], "direction" : "input", "multiple" : false, "multiple_sep" : ";" @@ -2958,56 +3092,26 @@ meta = [ "is_executable" : true } ], - "description" : "UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.\nThis component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place\n", + "description" : "Split sequencing reads by mapping them to multiple references simultaneously.", "test_resources" : [ { "type" : "bash_script", "path" : "test.sh", "is_executable" : true - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/scrb_seq_fastq.1.gz" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/scrb_seq_fastq.2.gz" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/slim.fastq.gz" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/umitools/extract/main.nf", - "modules/nf-core/umitools/extract/meta.yml" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "license" : "BBTools Copyright (c) 2014", + "links" : { + "repository" : "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh", + "homepage" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/", + "documentation" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/" + }, "runners" : [ { "type" : "executable", @@ -3092,19 +3196,16 @@ meta = [ "namespace_separator" : "/", "setup" : [ { - "type" : "apt", - "packages" : [ - "pip" - ], - "interactive" : false + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get install -y build-essential openjdk-17-jdk wget tar && \\\\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \\\\\ntar xzf BBMap_39.01.tar.gz && \\\\\ncp -r bbmap/* /usr/local/bin\n" + ] }, { - "type" : "python", - "user" : false, - "packages" : [ - "umi_tools" - ], - "upgrade" : true + "type" : "docker", + "run" : [ + "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \\"BBMAP:\\", $NF}' > /var/software_versions.txt\n" + ] } ] }, @@ -3114,49 +3215,39 @@ meta = [ } ], "build_info" : { - "config" : "/workdir/root/repo/src/umitools/umitools_extract/config.vsh.yaml", + "config" : "/workdir/root/repo/src/bbmap/bbmap_bbsplit/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/umitools/umitools_extract", + "output" : "target/nextflow/bbmap/bbmap_bbsplit", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3170,17 +3261,33 @@ def innerWorkflowFactory(args) { def rawScript = '''set -e tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN +#!/bin/bash + ## VIASH START # The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) $( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_BC_PATTERN+x} ]; then echo "${VIASH_PAR_BC_PATTERN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bc_pattern='&'#" ; else echo "# par_bc_pattern="; fi ) +$( if [ ! -z ${VIASH_PAR_REF+x} ]; then echo "${VIASH_PAR_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref='&'#" ; else echo "# par_ref="; fi ) +$( if [ ! -z ${VIASH_PAR_ONLY_BUILD_INDEX+x} ]; then echo "${VIASH_PAR_ONLY_BUILD_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_only_build_index='&'#" ; else echo "# par_only_build_index="; fi ) +$( if [ ! -z ${VIASH_PAR_BUILD+x} ]; then echo "${VIASH_PAR_BUILD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_build='&'#" ; else echo "# par_build="; fi ) +$( if [ ! -z ${VIASH_PAR_QIN+x} ]; then echo "${VIASH_PAR_QIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qin='&'#" ; else echo "# par_qin="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERLEAVED+x} ]; then echo "${VIASH_PAR_INTERLEAVED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interleaved='&'#" ; else echo "# par_interleaved="; fi ) +$( if [ ! -z ${VIASH_PAR_MAXINDEL+x} ]; then echo "${VIASH_PAR_MAXINDEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_maxindel='&'#" ; else echo "# par_maxindel="; fi ) +$( if [ ! -z ${VIASH_PAR_MINRATIO+x} ]; then echo "${VIASH_PAR_MINRATIO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minratio='&'#" ; else echo "# par_minratio="; fi ) +$( if [ ! -z ${VIASH_PAR_MINHITS+x} ]; then echo "${VIASH_PAR_MINHITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minhits='&'#" ; else echo "# par_minhits="; fi ) +$( if [ ! -z ${VIASH_PAR_AMBIGUOUS+x} ]; then echo "${VIASH_PAR_AMBIGUOUS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous='&'#" ; else echo "# par_ambiguous="; fi ) +$( if [ ! -z ${VIASH_PAR_AMBIGUOUS2+x} ]; then echo "${VIASH_PAR_AMBIGUOUS2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous2='&'#" ; else echo "# par_ambiguous2="; fi ) +$( if [ ! -z ${VIASH_PAR_QTRIM+x} ]; then echo "${VIASH_PAR_QTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qtrim='&'#" ; else echo "# par_qtrim="; fi ) +$( if [ ! -z ${VIASH_PAR_UNTRIM+x} ]; then echo "${VIASH_PAR_UNTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_untrim='&'#" ; else echo "# par_untrim="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) $( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) $( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) -$( if [ ! -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_EXTRACT_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_extract_method='&'#" ; else echo "# par_umitools_extract_method="; fi ) -$( if [ ! -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMITOOLS_UMI_SEPARATOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_umi_separator='&'#" ; else echo "# par_umitools_umi_separator="; fi ) -$( if [ ! -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_GROUPING_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_grouping_method='&'#" ; else echo "# par_umitools_grouping_method="; fi ) -$( if [ ! -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then echo "${VIASH_PAR_UMI_DISCARD_READ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_discard_read='&'#" ; else echo "# par_umi_discard_read="; fi ) +$( if [ ! -z ${VIASH_PAR_SAM2BAM+x} ]; then echo "${VIASH_PAR_SAM2BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam2bam='&'#" ; else echo "# par_sam2bam="; fi ) +$( if [ ! -z ${VIASH_PAR_SCAFSTATS+x} ]; then echo "${VIASH_PAR_SCAFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scafstats='&'#" ; else echo "# par_scafstats="; fi ) +$( if [ ! -z ${VIASH_PAR_REFSTATS+x} ]; then echo "${VIASH_PAR_REFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refstats='&'#" ; else echo "# par_refstats="; fi ) +$( if [ ! -z ${VIASH_PAR_NZO+x} ]; then echo "${VIASH_PAR_NZO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nzo='&'#" ; else echo "# par_nzo="; fi ) +$( if [ ! -z ${VIASH_PAR_BBMAP_ARGS+x} ]; then echo "${VIASH_PAR_BBMAP_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bbmap_args='&'#" ; else echo "# par_bbmap_args="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3201,7 +3308,6 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) ## VIASH END -#!/bin/bash set -eo pipefail @@ -3210,56 +3316,85 @@ function clean_up { } trap clean_up EXIT -tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") +unset_if_false=( par_paired par_only_build_index par_interleaved par_untrim par_nzo) -IFS="," read -ra input <<< "\\$par_input" -IFS="," read -ra pattern <<< "\\$par_bc_pattern" +for var in "\\${unset_if_false[@]}"; do + if [ -z "\\${!var}" ]; then + unset \\$var + fi +done -read_count="\\${#input[@]}" -pattern_count="\\${#pattern[@]}" +if [ ! -d "\\$par_build" ]; then + IFS=";" read -ra ref_files <<< "\\$par_ref" + primary_ref="\\${ref_files[0]}" + refs=() + for file in "\\${ref_files[@]:1}" + do + name=\\$(basename "\\$file" | sed 's/\\\\.[^.]*\\$//') + refs+=("ref_\\$name=\\$file") + done +fi -if [ "\\$par_paired" == "true" ]; then - echo "Paired - Reads: \\$read_count bc_patterns: \\$pattern_count" - if [ "\\$read_count" -ne 2 ] || [ "\\$pattern_count" -ne 2 ]; then - echo "Paired end input requires two read files and two UMI patterns" - exit 1 +if \\$par_only_build_index; then + if [ "\\${#refs[@]}" -gt 1 ]; then + bbsplit.sh \\\\ + --ref_primary="\\$primary_ref" \\\\ + "\\${refs[@]}" \\\\ + path=\\$par_index else - read1="\\$(basename -- \\${input[0]})" - read2="\\$(basename -- \\${input[1]})" - umi_tools extract \\\\ - -I "\\${input[0]}" --read2-in="\\${input[1]}" \\\\ - -S "\\$tmpdir/\\$read1" \\\\ - --read2-out="\\$tmpdir/\\$read2" \\\\ - --extract-method \\$par_umitools_extract_method \\\\ - --bc-pattern "\\${pattern[0]}" \\\\ - --bc-pattern2 "\\${pattern[1]}" \\\\ - --umi-separator \\$par_umitools_umi_separator - if [ \\$par_umi_discard_read == 1 ]; then - # discard read 1 - cp \\$tmpdir/\\$read1 \\$par_fastq_1 - elif [ \\$par_umi_discard_read == 2 ]; then - # discard read 2 - cp \\$tmpdir/\\$read2 \\$par_fastq_1 - else - cp \\$tmpdir/\\$read1 \\$par_fastq_1 - cp \\$tmpdir/\\$read2 \\$par_fastq_2 - fi + echo "ERROR: Please specify at least two reference fasta files." fi else - echo "Not Paired - \\$read_count" - if [ "\\$read_count" -ne 1 ] || [ "\\$pattern_count" -ne 1 ]; then - echo "Single end input requires one read file and one UMI pattern" - exit 1 + IFS=";" read -ra input <<< "\\$par_input" + tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX") + index_files='' + if [ -d "\\$par_build" ]; then + index_files="path=\\$par_build" + elif [ \\${#refs[@]} -gt 0 ]; then + index_files="--ref_primary=\\$primary_ref \\${refs[*]}" else - read1="\\$(basename -- \\${input[0]})" - umi_tools extract \\\\ - -I "\\${input[0]}" -S "\\$tmpdir/\\$read1" \\\\ - --extract-method \\$par_umitools_extract_method \\\\ - --bc-pattern "\\${pattern[0]}" \\\\ - --umi-separator \\$par_umitools_umi_separator - cp \\$tmpdir/\\$read1 \\$par_fastq_1 + echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files." + fi + + extra_args="" + if [ -f "\\$par_refstats" ]; then extra_args+=" --refstats \\$par_refstats"; fi + if [ -n "\\$par_ambiguous" ]; then extra_args+=" --ambiguous \\$par_ambiguous"; fi + if [ -n "\\$par_ambiguous2" ]; then extra_args+=" --ambiguous2 \\$par_ambiguous2"; fi + if [ -n "\\$par_minratio" ]; then extra_args+=" --minratio \\$par_minratio"; fi + if [ -n "\\$par_minhits" ]; then extra_args+=" --minhits \\$par_minhits"; fi + if [ -n "\\$par_maxindel" ]; then extra_args+=" --maxindel \\$par_maxindel"; fi + if [ -n "\\$par_qin" ]; then extra_args+=" --qin \\$par_qin"; fi + if [ -n "\\$par_qtrim" ]; then extra_args+=" --qtrim \\$par_qtrim"; fi + if [ "\\$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi + if [ "\\$par_untrim" = true ]; then extra_args+=" --untrim"; fi + if [ "\\$par_nzo" = true ]; then extra_args+=" --nzo"; fi + + if [ -n "\\$par_bbmap_args" ]; then extra_args+=" \\$par_bbmap_args"; fi + + + if \\$par_paired; then + bbsplit.sh \\\\ + \\$index_files \\\\ + in=\\${input[0]} \\\\ + in2=\\${input[1]} \\\\ + basename=\\${tmpdir}/%_#.fastq \\\\ + \\$extra_args + read1=\\$(find \\$tmpdir/ -iname primary_1*) + read2=\\$(find \\$tmpdir/ -iname primary_2*) + cp \\$read1 \\$par_fastq_1 + cp \\$read2 \\$par_fastq_2 + else + bbsplit.sh \\\\ + \\$index_files \\\\ + in=\\${input[0]} \\\\ + basename=\\${tmpdir}/%.fastq \\\\ + \\$extra_args + read1=\\$(find \\$tmpdir/ -iname primary*) + cp \\$read1 \\$par_fastq_1 fi fi + +exit 0 VIASHMAIN bash "$tempscript" ''' @@ -3620,7 +3755,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/umitools/umitools_extract", + "image" : "vsh/biobox/bbmap/bbmap_bbsplit", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/bbmap_bbsplit/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow.config similarity index 98% rename from target/nextflow/bbmap_bbsplit/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow.config index 1be7997..4603d2e 100644 --- a/target/nextflow/bbmap_bbsplit/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow.config @@ -1,9 +1,9 @@ manifest { - name = 'bbmap_bbsplit' + name = 'bbmap/bbmap_bbsplit' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'Split sequencing reads by mapping them to multiple references simultaneously.\n' + description = 'Split sequencing reads by mapping them to multiple references simultaneously.' } process.container = 'nextflow/bash:latest' diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json new file mode 100644 index 0000000..1321e05 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/nextflow_schema.json @@ -0,0 +1,321 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bbmap_bbsplit", +"description": "Split sequencing reads by mapping them to multiple references simultaneously.", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`. Sample ID", + "help_text": "Type: `string`. Sample ID" + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Paired fastq files or not?", + "help_text": "Type: `boolean_true`, default: `false`. Paired fastq files or not?" + , + "default": "False" + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"", + "help_text": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"." + + } + + + , + "ref": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\"", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\". The primary reference should be specified first." + + } + + + , + "only_build_index": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If set, only builds the index", + "help_text": "Type: `boolean_true`, default: `false`. If set, only builds the index. Otherwise, mapping is performed." + , + "default": "False" + } + + + , + "build": { + "type": + "string", + "description": "Type: `file`. Index to be used for mapping", + "help_text": "Type: `file`. Index to be used for mapping. \n" + + } + + + , + "qin": { + "type": + "string", + "description": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset", + "help_text": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n" + + } + + + , + "interleaved": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping", + "help_text": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n" + , + "default": "False" + } + + + , + "maxindel": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this", + "help_text": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this. Lower is faster. Set to \u003e=100k for RNA-seq.\n" + + } + + + , + "minratio": { + "type": + "number", + "description": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site", + "help_text": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site. Higher is faster.\n" + + } + + + , + "minhits": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites", + "help_text": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites. Higher is faster.\n" + + } + + + , + "ambiguous": { + "type": + "string", + "description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)", + "help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n", + "enum": ["best", "toss", "random", "all"] + + + } + + + , + "ambiguous2": { + "type": + "string", + "description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references", + "help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references.\nNormal \u0027ambiguous=\u0027 controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n", + "enum": ["best", "toss", "all", "split"] + + + } + + + , + "qtrim": { + "type": + "string", + "description": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping", + "help_text": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping. Options are \u0027l\u0027 (left), \u0027r\u0027 (right), and \u0027lr\u0027 (both).\n", + "enum": ["l", "r", "lr"] + + + } + + + , + "untrim": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Undo trimming after mapping", + "help_text": "Type: `boolean_true`, default: `false`. Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings." + , + "default": "False" + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index", + "help_text": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index.\n" + , + "default": "$id.$key.index.index" + } + + + , + "fastq_1": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1", + "help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1.\n" + , + "default": "$id.$key.fastq_1.fastq" + } + + + , + "fastq_2": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2.\n" + , + "default": "$id.$key.fastq_2.fastq" + } + + + , + "sam2bam": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file", + "help_text": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file.\n" + , + "default": "$id.$key.sam2bam.sh" + } + + + , + "scafstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file", + "help_text": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file.\n" + , + "default": "$id.$key.scafstats.txt" + } + + + , + "refstats": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file", + "help_text": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n" + , + "default": "$id.$key.refstats.txt" + } + + + , + "nzo": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage", + "help_text": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage." + , + "default": "False" + } + + + , + "bbmap_args": { + "type": + "string", + "description": "Type: `string`. Additional arguments from BBMap to pass to BBSplit", + "help_text": "Type: `string`. Additional arguments from BBMap to pass to BBSplit.\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml new file mode 100644 index 0000000..8b9438c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/.config.vsh.yaml @@ -0,0 +1,364 @@ +name: "bedtools_genomecov" +namespace: "bedtools" +version: "main" +authors: +- name: "Theodoro Gasperin Terra Camargo" + roles: + - "author" + - "maintainer" + info: + links: + email: "theodorogtc@gmail.com" + github: "tgaspe" + linkedin: "theodoro-gasperin-terra-camargo" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "The input file (BED/GFF/VCF) to be used.\n" + info: null + example: + - "input.bed" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_bam" + alternatives: + - "-ibam" + description: "The input file is in BAM format.\nNote: BAM _must_ be sorted by\ + \ positions.\n'--genome' option is ignored if you use '--input_bam' option!\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--genome" + alternatives: + - "-g" + description: "The genome file to be used.\n" + info: null + example: + - "genome.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "The output BED file. \n" + info: null + example: + - "output.bed" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--depth" + alternatives: + - "-d" + description: "Report the depth at each genome position (with one-based coordinates).\n\ + Default behavior is to report a histogram.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--depth_zero" + alternatives: + - "-dz" + description: "Report the depth at each genome position (with zero-based coordinates).\n\ + Reports only non-zero positions.\nDefault behavior is to report a histogram.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--bed_graph" + alternatives: + - "-bg" + description: "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--bed_graph_zero_coverage" + alternatives: + - "-bga" + description: "Report depth in BedGraph format, as above (-bg).\nHowever with this\ + \ option, regions with zero \ncoverage are also reported. This allows one to\n\ + quickly extract all regions of a genome with 0 \ncoverage by applying: \"grep\ + \ -w 0$\" to the output.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--split" + description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals.\n\ + when computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\"\ + \ operations \nto infer the blocks for computing coverage.\nFor BED12 files,\ + \ this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns\ + \ 10,11,12).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--ignore_deletion" + alternatives: + - "-ignoreD" + description: "Ignore local deletions (CIGAR \"D\" operations) in BAM entries\n\ + when computing coverage.\n" + info: null + direction: "input" + - type: "string" + name: "--strand" + description: "Calculate coverage of intervals from a specific strand.\nWith BED\ + \ files, requires at least 6 columns (strand is column 6). \n" + info: null + required: false + choices: + - "+" + - "-" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--pair_end_coverage" + alternatives: + - "-pc" + description: "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--fragment_size" + alternatives: + - "-fs" + description: "Force to use provided fragment size instead of read length\nWorks\ + \ for BAM files only\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--du" + description: "Change strand af the mate read (so both reads from the same strand)\ + \ useful for strand specific\nWorks for BAM files only\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--five_prime" + alternatives: + - "-5" + description: "Calculate coverage of 5\" positions (instead of entire interval).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--three_prime" + alternatives: + - "-3" + description: "Calculate coverage of 3\" positions (instead of entire interval).\n" + info: null + direction: "input" + - type: "integer" + name: "--max" + description: "Combine all positions with a depth >= max into\na single bin in\ + \ the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n" + info: null + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--scale" + description: "Scale the coverage by a constant factor.\nEach coverage value is\ + \ multiplied by this factor before being reported.\nUseful for normalizing coverage\ + \ by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n" + info: null + required: false + min: 0.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--trackline" + description: "Adds a UCSC/Genome-Browser track line definition in the first line\ + \ of the output.\n- See here for more details about track line definition:\n\ + \ http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding\ + \ a trackline definition, the output BedGraph can be easily\n uploaded\ + \ to the Genome Browser as a custom track,\n BUT CAN NOT be converted into\ + \ a BigWig file (w/o removing the first line).\n" + info: null + direction: "input" + - type: "string" + name: "--trackopts" + description: "Writes additional track line definition parameters in the first\ + \ line.\n- Example:\n -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'\n\ + \ Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Compute the coverage of a feature file among a genome.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "genome coverage" +- "BED" +- "GFF" +- "VCF" +- "BAM" +license: "MIT" +references: + doi: + - "10.1093/bioinformatics/btq033" +links: + repository: "https://github.com/arq5x/bedtools2" + homepage: "https://bedtools.readthedocs.io/en/latest/#" + documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html" + issue_tracker: "https://github.com/arq5x/bedtools2/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "debian:stable-slim" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "bedtools" + - "procps" + interactive: false + - type: "docker" + run: + - "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\ + \ > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/bedtools/bedtools_genomecov/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/bedtools/bedtools_genomecov" + executable: "target/nextflow/bedtools/bedtools_genomecov/main.nf" + viash_version: "0.9.0" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" +package_config: + name: "biobox" + version: "main" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/rsem/rsem_calculate_expression/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf similarity index 90% rename from target/nextflow/rsem/rsem_calculate_expression/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf index f79cc2e..29f1f47 100644 --- a/target/nextflow/rsem/rsem_calculate_expression/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf @@ -1,4 +1,4 @@ -// rsem_calculate_expression main +// bedtools_genomecov main // // This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -8,6 +8,9 @@ // authors of this component should specify the license in the header of such // files, or include a separate license file detailing the licenses of all included // files. +// +// Component authors: +// * Theodoro Gasperin Terra Camargo (author, maintainer) //////////////////////////// // VDSL3 helper functions // @@ -2804,60 +2807,46 @@ nextflow.enable.dsl=2 meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ - "name" : "rsem_calculate_expression", - "namespace" : "rsem", + "name" : "bedtools_genomecov", + "namespace" : "bedtools", "version" : "main", + "authors" : [ + { + "name" : "Theodoro Gasperin Terra Camargo", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "theodorogtc@gmail.com", + "github" : "tgaspe", + "linkedin" : "theodoro-gasperin-terra-camargo" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], "argument_groups" : [ { - "name" : "Input", + "name" : "Inputs", "arguments" : [ - { - "type" : "string", - "name" : "--id", - "description" : "Sample ID.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "string", - "name" : "--strandedness", - "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse", - "required" : false, - "choices" : [ - "forward", - "reverse", - "unstranded" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "boolean", - "name" : "--paired", - "description" : "Paired-end reads or not?", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "file", "name" : "--input", - "description" : "Input reads for quantification.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--index", - "description" : "RSEM index.", + "alternatives" : [ + "-i" + ], + "description" : "The input file (BED/GFF/VCF) to be used.\n", + "example" : [ + "input.bed" + ], "must_exist" : true, "create_parent" : true, "required" : false, @@ -2866,9 +2855,31 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "string", - "name" : "--extra_args", - "description" : "Extra rsem-calculate-expression arguments in addition to the defaults.", + "type" : "file", + "name" : "--input_bam", + "alternatives" : [ + "-ibam" + ], + "description" : "The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n'--genome' option is ignored if you use '--input_bam' option!\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--genome", + "alternatives" : [ + "-g" + ], + "description" : "The genome file to be used.\n", + "example" : [ + "genome.txt" + ], + "must_exist" : true, + "create_parent" : true, "required" : false, "direction" : "input", "multiple" : false, @@ -2877,104 +2888,166 @@ meta = [ ] }, { - "name" : "Output", + "name" : "Outputs", "arguments" : [ { "type" : "file", - "name" : "--counts_gene", - "description" : "Expression counts on gene level", + "name" : "--output", + "description" : "The output BED file. \n", "example" : [ - "sample.genes.results" + "output.bed" ], "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "output", "multiple" : false, "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--depth", + "alternatives" : [ + "-d" + ], + "description" : "Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--depth_zero", + "alternatives" : [ + "-dz" + ], + "description" : "Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--bed_graph", + "alternatives" : [ + "-bg" + ], + "description" : "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--bed_graph_zero_coverage", + "alternatives" : [ + "-bga" + ], + "description" : "Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \\"grep -w 0$\\" to the output.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--split", + "description" : "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--ignore_deletion", + "alternatives" : [ + "-ignoreD" + ], + "description" : "Ignore local deletions (CIGAR \\"D\\" operations) in BAM entries\nwhen computing coverage.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--strand", + "description" : "Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n", + "required" : false, + "choices" : [ + "+", + "-" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" }, { - "type" : "file", - "name" : "--counts_transcripts", - "description" : "Expression counts on transcript level", - "example" : [ - "sample.isoforms.results" + "type" : "boolean_true", + "name" : "--pair_end_coverage", + "alternatives" : [ + "-pc" ], - "must_exist" : true, - "create_parent" : true, + "description" : "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fragment_size", + "alternatives" : [ + "-fs" + ], + "description" : "Force to use provided fragment size instead of read length\nWorks for BAM files only\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--du", + "description" : "Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--five_prime", + "alternatives" : [ + "-5" + ], + "description" : "Calculate coverage of 5\\" positions (instead of entire interval).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--three_prime", + "alternatives" : [ + "-3" + ], + "description" : "Calculate coverage of 3\\" positions (instead of entire interval).\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--max", + "description" : "Combine all positions with a depth >= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n", "required" : false, - "direction" : "output", + "min" : 0, + "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "file", - "name" : "--stat", - "description" : "RSEM statistics", - "example" : [ - "sample.stat" - ], - "must_exist" : true, - "create_parent" : true, + "type" : "double", + "name" : "--scale", + "description" : "Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n", "required" : false, - "direction" : "output", + "min" : 0.0, + "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "file", - "name" : "--logs", - "description" : "RSEM logs", - "example" : [ - "sample.log" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" + "type" : "boolean_true", + "name" : "--trackline", + "description" : "Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n", + "direction" : "input" }, { - "type" : "file", - "name" : "--bam_star", - "description" : "BAM file generated by STAR (optional)", - "example" : [ - "sample.STAR.genome.bam" - ], - "must_exist" : true, - "create_parent" : true, + "type" : "string", + "name" : "--trackopts", + "description" : "Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts 'name=\\"My Track\\" visibility=2 color=255,30,30'\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n", "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--bam_genome", - "description" : "Genome BAM file (optional)", - "example" : [ - "sample.genome.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--bam_transcript", - "description" : "Transcript BAM file (optional)", - "example" : [ - "sample.transcript.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, + "direction" : "input", + "multiple" : true, "multiple_sep" : ";" } ] @@ -2987,7 +3060,7 @@ meta = [ "is_executable" : true } ], - "description" : "Calculate expression with RSEM.\n", + "description" : "Compute the coverage of a feature file among a genome.\n", "test_resources" : [ { "type" : "bash_script", @@ -2996,47 +3069,34 @@ meta = [ }, { "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/reference/rsem.tar.gz" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/rsem/calculateexpression/main.nf", - "modules/nf-core/rsem/calculateexpression/meta.yml" - ], - "last_sha" : "92b2a7857de1dda9d1c19a088941fc81e2976ff7" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "genome coverage", + "BED", + "GFF", + "VCF", + "BAM" ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btq033" + ] + }, + "links" : { + "repository" : "https://github.com/arq5x/bedtools2", + "homepage" : "https://bedtools.readthedocs.io/en/latest/#", + "documentation" : "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html", + "issue_tracker" : "https://github.com/arq5x/bedtools2/issues" + }, "runners" : [ { "type" : "executable", @@ -3115,7 +3175,7 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "debian:stable-slim", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", @@ -3123,31 +3183,15 @@ meta = [ { "type" : "apt", "packages" : [ - "build-essential", - "gcc", - "g++", - "make", - "wget", - "zlib1g-dev", - "unzip", - "xxd", - "perl", - "r-base", - "bowtie2", - "python3-pip", - "git" + "bedtools", + "procps" ], "interactive" : false }, { "type" : "docker", "run" : [ - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\nunzip ${STAR_VERSION}.zip && \\\\\ncd STAR-${STAR_VERSION}/source && \\\\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\ncp STAR /usr/local/bin && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \\\\\nunzip v${RSEM_VERSION}.zip && \\\\\ncd RSEM-${RSEM_VERSION} && \\\\\nmake && \\\\\nmake install && \\\\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\nrm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \\\\\ncd && \\\\\napt-get clean && \\\\\necho 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \\\\\necho 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \\\\\n/bin/bash -c \\"source /etc/profile && source ~/.bashrc && echo $PATH && which STAR\\"\n" - ], - "env" : [ - "STAR_VERSION=2.7.11b", - "RSEM_VERSION=1.3.3", - "TZ=Europe/Brussels" + "echo \\"bedtools: \\\\\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\\\\"\\" > /var/software_versions.txt\n" ] } ] @@ -3158,49 +3202,39 @@ meta = [ } ], "build_info" : { - "config" : "/workdir/root/repo/src/rsem/rsem_calculate_expression/config.vsh.yaml", + "config" : "/workdir/root/repo/src/bedtools/bedtools_genomecov/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/rsem/rsem_calculate_expression", + "output" : "target/nextflow/bedtools/bedtools_genomecov", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3214,21 +3248,30 @@ def innerWorkflowFactory(args) { def rawScript = '''set -e tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN +#!/bin/bash + ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) -$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) -$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi ) -$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) -$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi ) -$( if [ ! -z ${VIASH_PAR_STAT+x} ]; then echo "${VIASH_PAR_STAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stat='&'#" ; else echo "# par_stat="; fi ) -$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM_STAR+x} ]; then echo "${VIASH_PAR_BAM_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_star='&'#" ; else echo "# par_bam_star="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM_GENOME+x} ]; then echo "${VIASH_PAR_BAM_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_genome='&'#" ; else echo "# par_bam_genome="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_BAM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_transcript='&'#" ; else echo "# par_bam_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_BAM+x} ]; then echo "${VIASH_PAR_INPUT_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_bam='&'#" ; else echo "# par_input_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_GENOME+x} ]; then echo "${VIASH_PAR_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome='&'#" ; else echo "# par_genome="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_DEPTH+x} ]; then echo "${VIASH_PAR_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth='&'#" ; else echo "# par_depth="; fi ) +$( if [ ! -z ${VIASH_PAR_DEPTH_ZERO+x} ]; then echo "${VIASH_PAR_DEPTH_ZERO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth_zero='&'#" ; else echo "# par_depth_zero="; fi ) +$( if [ ! -z ${VIASH_PAR_BED_GRAPH+x} ]; then echo "${VIASH_PAR_BED_GRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph='&'#" ; else echo "# par_bed_graph="; fi ) +$( if [ ! -z ${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE+x} ]; then echo "${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph_zero_coverage='&'#" ; else echo "# par_bed_graph_zero_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLIT+x} ]; then echo "${VIASH_PAR_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split='&'#" ; else echo "# par_split="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_DELETION+x} ]; then echo "${VIASH_PAR_IGNORE_DELETION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_deletion='&'#" ; else echo "# par_ignore_deletion="; fi ) +$( if [ ! -z ${VIASH_PAR_STRAND+x} ]; then echo "${VIASH_PAR_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strand='&'#" ; else echo "# par_strand="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIR_END_COVERAGE+x} ]; then echo "${VIASH_PAR_PAIR_END_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pair_end_coverage='&'#" ; else echo "# par_pair_end_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_SIZE+x} ]; then echo "${VIASH_PAR_FRAGMENT_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_size='&'#" ; else echo "# par_fragment_size="; fi ) +$( if [ ! -z ${VIASH_PAR_DU+x} ]; then echo "${VIASH_PAR_DU}" | sed "s#'#'\\"'\\"'#g;s#.*#par_du='&'#" ; else echo "# par_du="; fi ) +$( if [ ! -z ${VIASH_PAR_FIVE_PRIME+x} ]; then echo "${VIASH_PAR_FIVE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_five_prime='&'#" ; else echo "# par_five_prime="; fi ) +$( if [ ! -z ${VIASH_PAR_THREE_PRIME+x} ]; then echo "${VIASH_PAR_THREE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime='&'#" ; else echo "# par_three_prime="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX+x} ]; then echo "${VIASH_PAR_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max='&'#" ; else echo "# par_max="; fi ) +$( if [ ! -z ${VIASH_PAR_SCALE+x} ]; then echo "${VIASH_PAR_SCALE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scale='&'#" ; else echo "# par_scale="; fi ) +$( if [ ! -z ${VIASH_PAR_TRACKLINE+x} ]; then echo "${VIASH_PAR_TRACKLINE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackline='&'#" ; else echo "# par_trackline="; fi ) +$( if [ ! -z ${VIASH_PAR_TRACKOPTS+x} ]; then echo "${VIASH_PAR_TRACKOPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackopts='&'#" ; else echo "# par_trackopts="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3249,47 +3292,57 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) ## VIASH END -#!/bin/bash +# Exit on error set -eo pipefail -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT +# Unset variables +unset_if_false=( + par_input_bam + par_depth + par_depth_zero + par_bed_graph + par_bed_graph_zero_coverage + par_split + par_ignore_deletion + par_pair_end_coverage + par_fragment_size + par_du + par_five_prime + par_three_prime + par_trackline +) -tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done -[[ "\\$par_paired" == "false" ]] && unset par_paired +# Create input array +IFS=";" read -ra trackopts <<< \\$par_trackopts -if [ \\$par_strandedness == 'forward' ]; then - strandedness='--strandedness forward' -elif [ \\$par_strandedness == 'reverse' ]; then - strandedness='--strandedness reverse' -else - strandedness='' -fi - -IFS=";" read -ra input <<< \\$par_input - -INDEX=\\`find -L \\$par_index/ -name "*.grp" | sed 's/\\\\.grp\\$//'\\` - -rsem-calculate-expression \\\\ - \\${meta_cpus:+--num-threads \\$meta_cpus} \\\\ - \\$strandedness \\\\ - \\${par_paired:+--paired-end} \\\\ - \\$par_extra_args \\\\ - \\${input[*]} \\\\ - \\$INDEX \\\\ - \\$par_id +bedtools genomecov \\\\ + \\${par_depth:+-d} \\\\ + \\${par_depth_zero:+-dz} \\\\ + \\${par_bed_graph:+-bg} \\\\ + \\${par_bed_graph_zero_coverage:+-bga} \\\\ + \\${par_split:+-split} \\\\ + \\${par_ignore_deletion:+-ignoreD} \\\\ + \\${par_du:+-du} \\\\ + \\${par_five_prime:+-5} \\\\ + \\${par_three_prime:+-3} \\\\ + \\${par_trackline:+-trackline} \\\\ + \\${par_strand:+-strand "\\$par_strand"} \\\\ + \\${par_max:+-max "\\$par_max"} \\\\ + \\${par_scale:+-scale "\\$par_scale"} \\\\ + \\${par_trackopts:+-trackopts "\\${trackopts[*]}"} \\\\ + \\${par_input_bam:+-ibam "\\$par_input_bam"} \\\\ + \\${par_input:+-i "\\$par_input"} \\\\ + \\${par_genome:+-g "\\$par_genome"} \\\\ + \\${par_pair_end_coverage:+-pc} \\\\ + \\${par_fragment_size:+-fs} \\\\ + > "\\$par_output" -[[ -e "\\${par_id}.genes.results" ]] && mv "\\${par_id}.genes.results" \\$par_counts_gene -[[ -e "\\${par_id}id.isoforms.results" ]] && mv "\\${par_id}id.isoforms.results" \\$par_counts_transcripts -[[ -e "\\${par_id}.stat" ]] && mv "\\${par_id}.stat" \\$par_stat -# [[ -e "\\${par_id}.log" ]] && mv "\\${par_id}.log" \\$par_logs -[[ -e "\\${par_id}.STAR.genome.bam" ]] && mv "\\${par_id}.STAR.genome.bam" \\$par_bam_star -[[ -e "\\${par_id}.genome.bam" ]] && mv "\\${par_id}.genome.bam" \\$par_bam_genome -[[ -e "\\${par_id}.transcript.bam" ]] && mv "\\${par_id}.transcript.bam" \\$par_bam_transcript VIASHMAIN bash "$tempscript" ''' @@ -3650,7 +3703,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/rsem/rsem_calculate_expression", + "image" : "vsh/biobox/bedtools/bedtools_genomecov", "tag" : "main" }, "tag" : "$id" diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/nextflow.config new file mode 100644 index 0000000..784641e --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'bedtools/bedtools_genomecov' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'main' + description = 'Compute the coverage of a feature file among a genome.\n' + author = 'Theodoro Gasperin Terra Camargo' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json new file mode 100644 index 0000000..3e0bb4b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/nextflow_schema.json @@ -0,0 +1,303 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "bedtools_genomecov", +"description": "Compute the coverage of a feature file among a genome.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used", + "help_text": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used.\n" + + } + + + , + "input_bam": { + "type": + "string", + "description": "Type: `file`. The input file is in BAM format", + "help_text": "Type: `file`. The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n\u0027--genome\u0027 option is ignored if you use \u0027--input_bam\u0027 option!\n" + + } + + + , + "genome": { + "type": + "string", + "description": "Type: `file`, example: `genome.txt`. The genome file to be used", + "help_text": "Type: `file`, example: `genome.txt`. The genome file to be used.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file", + "help_text": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file. \n" + , + "default": "$id.$key.output.bed" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "depth": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates)", + "help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n" + , + "default": "False" + } + + + , + "depth_zero": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates)", + "help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n" + , + "default": "False" + } + + + , + "bed_graph": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format", + "help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n" + , + "default": "False" + } + + + , + "bed_graph_zero_coverage": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg)", + "help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \"grep -w 0$\" to the output.\n" + , + "default": "False" + } + + + , + "split": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals", + "help_text": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n" + , + "default": "False" + } + + + , + "ignore_deletion": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage", + "help_text": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage.\n" + , + "default": "False" + } + + + , + "strand": { + "type": + "string", + "description": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand", + "help_text": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n", + "enum": ["+", "-"] + + + } + + + , + "pair_end_coverage": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments", + "help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments.\nWorks for BAM files only\n" + , + "default": "False" + } + + + , + "fragment_size": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n", + "help_text": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n" + , + "default": "False" + } + + + , + "du": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n", + "help_text": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n" + , + "default": "False" + } + + + , + "five_prime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval)", + "help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval).\n" + , + "default": "False" + } + + + , + "three_prime": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval)", + "help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval).\n" + , + "default": "False" + } + + + , + "max": { + "type": + "integer", + "description": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram", + "help_text": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n" + + } + + + , + "scale": { + "type": + "number", + "description": "Type: `double`. Scale the coverage by a constant factor", + "help_text": "Type: `double`. Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n" + + } + + + , + "trackline": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output", + "help_text": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n" + , + "default": "False" + } + + + , + "trackopts": { + "type": + "string", + "description": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line", + "help_text": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts \u0027name=\"My Track\" visibility=2 color=255,30,30\u0027\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml new file mode 100644 index 0000000..04db226 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/.config.vsh.yaml @@ -0,0 +1,367 @@ +name: "fastqc" +version: "main" +authors: +- name: "Theodoro Gasperin Terra Camargo" + roles: + - "author" + - "maintainer" + info: + links: + email: "theodorogtc@gmail.com" + github: "tgaspe" + linkedin: "theodoro-gasperin-terra-camargo" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "FASTQ file(s) to be analyzed.\n" + info: null + example: + - "input.fq" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Outputs" + description: "At least one of the output options (--html, --zip, --summary, --data)\ + \ must be used.\n" + arguments: + - type: "file" + name: "--html" + description: "Create the HTML report of the results. \n'*' wild card must be provided\ + \ in the output file name. \nWild card will be replaced by the input file basename.\n\ + e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\ + \ html file named sample_1.html\n" + info: null + example: + - "*.html" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--zip" + description: "Create the zip file(s) containing: html report, data, images, icons,\ + \ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\ + \ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\ + \n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n" + info: null + example: + - "*.zip" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--summary" + description: "Create the summary file(s).\n'*' wild card must be provided in the\ + \ output file name.\nWild card will be replaced by the input basename.\ne.g.\ + \ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\ + \ an output summary.txt file named sample_1_summary.txt\n" + info: null + example: + - "*_summary.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--data" + description: "Create the data file(s).\n'*' wild card must be provided in the\ + \ output file name.\nWild card will be replaced by the input basename.\ne.g.\ + \ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\ + \ output data.txt file named sample_1_data.txt\n" + info: null + example: + - "*_data.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: true + multiple_sep: ";" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--casava" + description: "Files come from raw casava output. Files in the same sample\ngroup\ + \ (differing only by the group number) will be analysed\nas a set rather than\ + \ individually. Sequences with the filter\nflag set in the header will be excluded\ + \ from the analysis.\nFiles must have the same names given to them by casava\n\ + (including being gzipped and ending with .gz) otherwise they\nwon't be grouped\ + \ together correctly.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nano" + description: "Files come from nanopore sequences and are in fast5 format. In\n\ + this mode you can pass in directories to process and the program\nwill take\ + \ in all fast5 files within those directories and produce\na single output file\ + \ from the sequences found in all files.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nofilter" + description: "If running with --casava then don't remove read flagged by\ncasava\ + \ as poor quality when performing the QC analysis.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--nogroup" + description: "Disable grouping of bases for reads >50bp. \nAll reports will show\ + \ data for every base in the read. \nWARNING: Using this option will cause fastqc\ + \ to crash \nand burn if you use it on really long reads, and your \nplots may\ + \ end up a ridiculous size. You have been warned!\n" + info: null + direction: "input" + - type: "integer" + name: "--min_length" + description: "Sets an artificial lower limit on the length of the \nsequence to\ + \ be shown in the report. As long as you \nset this to a value greater or equal\ + \ to your longest \nread length then this will be the sequence length used \n\ + to create your read groups. This can be useful for making\ndirectly comparable\ + \ statistics from datasets with somewhat \nvariable read lengths.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--format" + alternatives: + - "-f" + description: "Bypasses the normal sequence file format detection and \nforces\ + \ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\ + \ sam_mapped, and fastq.\n" + info: null + example: + - "bam" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--contaminants" + alternatives: + - "-c" + description: "Specifies a non-default file which contains the list \nof contaminants\ + \ to screen overrepresented sequences against. \nThe file must contain sets\ + \ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\ + \ a hash will be ignored.\n" + info: null + example: + - "contaminants.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--adapters" + alternatives: + - "-a" + description: "Specifies a non-default file which contains the list of \nadapter\ + \ sequences which will be explicitly searched against \nthe library. The file\ + \ must contain sets of named adapters \nin the form name[tab]sequence. Lines\ + \ prefixed with a hash will be ignored.\n" + info: null + example: + - "adapters.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--limits" + alternatives: + - "-l" + description: "Specifies a non-default file which contains \na set of criteria\ + \ which will be used to determine \nthe warn/error limits for the various modules.\ + \ \nThis file can also be used to selectively remove \nsome modules from the\ + \ output altogether. The format \nneeds to mirror the default limits.txt file\ + \ found in \nthe Configuration folder.\n" + info: null + example: + - "limits.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--kmers" + alternatives: + - "-k" + description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\ + \ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\ + \ specified.\n" + info: null + example: + - 7 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--quiet" + alternatives: + - "-q" + description: "Suppress all progress messages on stdout and only report errors.\n" + info: null + direction: "input" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "FastQC - A high throughput sequence QC analysis tool." +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "Quality control" +- "BAM" +- "SAM" +- "FASTQ" +license: "GPL-3.0, Apache-2.0" +links: + repository: "https://github.com/s-andrews/FastQC" + homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" + documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/" + issue_tracker: "https://github.com/s-andrews/FastQC/issues" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "biocontainers/fastqc:v0.11.9_cv8" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/fastqc/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/fastqc" + executable: "target/nextflow/fastqc/main.nf" + viash_version: "0.9.0" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" +package_config: + name: "biobox" + version: "main" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf new file mode 100644 index 0000000..39ed05c --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf @@ -0,0 +1,3827 @@ +// fastqc main +// +// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Theodoro Gasperin Terra Camargo (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "fastqc", + "version" : "main", + "authors" : [ + { + "name" : "Theodoro Gasperin Terra Camargo", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "theodorogtc@gmail.com", + "github" : "tgaspe", + "linkedin" : "theodoro-gasperin-terra-camargo" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "FASTQ file(s) to be analyzed.\n", + "example" : [ + "input.fq" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "description" : "At least one of the output options (--html, --zip, --summary, --data) must be used.\n", + "arguments" : [ + { + "type" : "file", + "name" : "--html", + "description" : "Create the HTML report of the results. \n'*' wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \\"sample_1.fq\\"\n --html \\"*.html\\"\n would create an output html file named sample_1.html\n", + "example" : [ + "*.html" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--zip", + "description" : "Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --html \\"*.zip\\"\n would create an output zip file named sample_1.zip\n", + "example" : [ + "*.zip" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--summary", + "description" : "Create the summary file(s).\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --summary \\"*_summary.txt\\"\n would create an output summary.txt file named sample_1_summary.txt\n", + "example" : [ + "*_summary.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--data", + "description" : "Create the data file(s).\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --summary \\"*_data.txt\\"\n would create an output data.txt file named sample_1_data.txt\n", + "example" : [ + "*_data.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--casava", + "description" : "Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon't be grouped together correctly.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nano", + "description" : "Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nofilter", + "description" : "If running with --casava then don't remove read flagged by\ncasava as poor quality when performing the QC analysis.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--nogroup", + "description" : "Disable grouping of bases for reads >50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_length", + "description" : "Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--format", + "alternatives" : [ + "-f" + ], + "description" : "Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n", + "example" : [ + "bam" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--contaminants", + "alternatives" : [ + "-c" + ], + "description" : "Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n", + "example" : [ + "contaminants.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--adapters", + "alternatives" : [ + "-a" + ], + "description" : "Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n", + "example" : [ + "adapters.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--limits", + "alternatives" : [ + "-l" + ], + "description" : "Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n", + "example" : [ + "limits.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--kmers", + "alternatives" : [ + "-k" + ], + "description" : "Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n", + "example" : [ + 7 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "alternatives" : [ + "-q" + ], + "description" : "Suppress all progress messages on stdout and only report errors.\n", + "direction" : "input" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "FastQC - A high throughput sequence QC analysis tool.", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Quality control", + "BAM", + "SAM", + "FASTQ" + ], + "license" : "GPL-3.0, Apache-2.0", + "links" : { + "repository" : "https://github.com/s-andrews/FastQC", + "homepage" : "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/", + "documentation" : "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/", + "issue_tracker" : "https://github.com/s-andrews/FastQC/issues" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "biocontainers/fastqc:v0.11.9_cv8", + "target_registry" : "images.viash-hub.com", + "target_tag" : "main", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo \\"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\\" > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/fastqc/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/fastqc", + "viash_version" : "0.9.0", + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" + }, + "package_config" : { + "name" : "biobox", + "version" : "main", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'main'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_HTML+x} ]; then echo "${VIASH_PAR_HTML}" | sed "s#'#'\\"'\\"'#g;s#.*#par_html='&'#" ; else echo "# par_html="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP+x} ]; then echo "${VIASH_PAR_ZIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip='&'#" ; else echo "# par_zip="; fi ) +$( if [ ! -z ${VIASH_PAR_SUMMARY+x} ]; then echo "${VIASH_PAR_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary='&'#" ; else echo "# par_summary="; fi ) +$( if [ ! -z ${VIASH_PAR_DATA+x} ]; then echo "${VIASH_PAR_DATA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data='&'#" ; else echo "# par_data="; fi ) +$( if [ ! -z ${VIASH_PAR_CASAVA+x} ]; then echo "${VIASH_PAR_CASAVA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_casava='&'#" ; else echo "# par_casava="; fi ) +$( if [ ! -z ${VIASH_PAR_NANO+x} ]; then echo "${VIASH_PAR_NANO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nano='&'#" ; else echo "# par_nano="; fi ) +$( if [ ! -z ${VIASH_PAR_NOFILTER+x} ]; then echo "${VIASH_PAR_NOFILTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nofilter='&'#" ; else echo "# par_nofilter="; fi ) +$( if [ ! -z ${VIASH_PAR_NOGROUP+x} ]; then echo "${VIASH_PAR_NOGROUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nogroup='&'#" ; else echo "# par_nogroup="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_LENGTH+x} ]; then echo "${VIASH_PAR_MIN_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_length='&'#" ; else echo "# par_min_length="; fi ) +$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi ) +$( if [ ! -z ${VIASH_PAR_CONTAMINANTS+x} ]; then echo "${VIASH_PAR_CONTAMINANTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_contaminants='&'#" ; else echo "# par_contaminants="; fi ) +$( if [ ! -z ${VIASH_PAR_ADAPTERS+x} ]; then echo "${VIASH_PAR_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapters='&'#" ; else echo "# par_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_LIMITS+x} ]; then echo "${VIASH_PAR_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limits='&'#" ; else echo "# par_limits="; fi ) +$( if [ ! -z ${VIASH_PAR_KMERS+x} ]; then echo "${VIASH_PAR_KMERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmers='&'#" ; else echo "# par_kmers="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +# exit on error +set -eo pipefail + +# Check if both outputs are empty, at least one must be passed. +if [[ -z "\\$par_html" ]] && [[ -z "\\$par_zip" ]] && [[ -z "\\$par_summary" ]] && [[ -z "\\$par_data" ]]; then + echo "Error: At least one of the output arguments (--html, --zip, --summary, and --data) must be passed." + exit 1 +fi + +# unset flags +unset_if_false=( + par_casava + par_nano + par_nofilter + par_extract + par_noextract + par_nogroup + par_quiet +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +tmpdir=\\$(mktemp -d "\\${meta_temp_dir}/\\${meta_name}-XXXXXXXX") +function clean_up { + rm -rf "\\$tmpdir" +} +trap clean_up EXIT + +# Create input array +IFS=";" read -ra input <<< \\$par_input + +# Run fastqc +fastqc \\\\ + --extract \\\\ + \\${par_casava:+--casava} \\\\ + \\${par_nano:+--nano} \\\\ + \\${par_nofilter:+--nofilter} \\\\ + \\${par_nogroup:+--nogroup} \\\\ + \\${par_min_length:+--min_length "\\$par_min_length"} \\\\ + \\${par_format:+--format "\\$par_format"} \\\\ + \\${par_contaminants:+--contaminants "\\$par_contaminants"} \\\\ + \\${par_adapters:+--adapters "\\$par_adapters"} \\\\ + \\${par_limits:+--limits "\\$par_limits"} \\\\ + \\${par_kmers:+--kmers "\\$par_kmers"} \\\\ + \\${par_quiet:+--quiet} \\\\ + \\${meta_cpus:+--threads "\\$meta_cpus"} \\\\ + \\${meta_temp_dir:+--dir "\\$meta_temp_dir"} \\\\ + --outdir "\\${tmpdir}" \\\\ + "\\${input[@]}" + +# Move output files +for file in "\\${input[@]}"; do + # Removes everthing after the first dot of the basename + sample_name=\\$(basename "\\${file}" | sed 's/\\\\..*\\$//') + if [[ -n "\\$par_html" ]]; then + input_html="\\${tmpdir}/\\${sample_name}_fastqc.html" + html_file="\\${par_html//\\\\*/\\$sample_name}" + mv "\\$input_html" "\\$html_file" + fi + if [[ -n "\\$par_zip" ]]; then + input_zip="\\${tmpdir}/\\${sample_name}_fastqc.zip" + zip_file="\\${par_zip//\\\\*/\\$sample_name}" + mv "\\$input_zip" "\\$zip_file" + fi + if [[ -n "\\$par_summary" ]]; then + summary_file="\\${tmpdir}/\\${sample_name}_fastqc/summary.txt" + new_summary="\\${par_summary//\\\\*/\\$sample_name}" + mv "\\$summary_file" "\\$new_summary" + fi + if [[ -n "\\$par_data" ]]; then + data_file="\\${tmpdir}/\\${sample_name}_fastqc/fastqc_data.txt" + new_data="\\${par_data//\\\\*/\\$sample_name}" + mv "\\$data_file" "\\$new_data" + fi + # Remove the extracted directory + rm -r "\\${tmpdir}/\\${sample_name}_fastqc" +done +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/fastqc", + "tag" : "main" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/nextflow.config new file mode 100644 index 0000000..a90755b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'fastqc' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'main' + description = 'FastQC - A high throughput sequence QC analysis tool.' + author = 'Theodoro Gasperin Terra Camargo' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/nextflow_schema.json new file mode 100644 index 0000000..75ac399 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/nextflow_schema.json @@ -0,0 +1,257 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "fastqc", +"description": "FastQC - A high throughput sequence QC analysis tool.", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed", + "help_text": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed.\n" + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "At least one of the output options (--html, --zip, --summary, --data) must be used.\n", + "properties": { + + + "html": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results", + "help_text": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results. \n\u0027*\u0027 wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output html file named sample_1.html\n" + , + "default": "$id.$key.html_*.html" + } + + + , + "zip": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc", + "help_text": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n" + , + "default": "$id.$key.zip_*.zip" + } + + + , + "summary": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s)", + "help_text": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create an output summary.txt file named sample_1_summary.txt\n" + , + "default": "$id.$key.summary_*.txt" + } + + + , + "data": { + "type": + "string", + "description": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s)", + "help_text": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an output data.txt file named sample_1_data.txt\n" + , + "default": "$id.$key.data_*.txt" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "casava": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Files come from raw casava output", + "help_text": "Type: `boolean_true`, default: `false`. Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon\u0027t be grouped together correctly.\n" + , + "default": "False" + } + + + , + "nano": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format", + "help_text": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n" + , + "default": "False" + } + + + , + "nofilter": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis", + "help_text": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis.\n" + , + "default": "False" + } + + + , + "nogroup": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp", + "help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n" + , + "default": "False" + } + + + , + "min_length": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report", + "help_text": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n" + + } + + + , + "format": { + "type": + "string", + "description": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format", + "help_text": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n" + + } + + + , + "contaminants": { + "type": + "string", + "description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against", + "help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n" + + } + + + , + "adapters": { + "type": + "string", + "description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library", + "help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n" + + } + + + , + "limits": { + "type": + "string", + "description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules", + "help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n" + + } + + + , + "kmers": { + "type": + "integer", + "description": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module", + "help_text": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n" + + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors", + "help_text": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors.\n" + , + "default": "False" + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/umitools/umitools_dedup/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml similarity index 61% rename from target/nextflow/umitools/umitools_dedup/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml index 2686ec9..a55dc37 100644 --- a/target/nextflow/umitools/umitools_dedup/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/.config.vsh.yaml @@ -1,22 +1,23 @@ -name: "umitools_dedup" -namespace: "umitools" +name: "fq_subsample" version: "main" argument_groups: - name: "Input" arguments: - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" + - type: "file" + name: "--input_1" + description: "First input fastq file to subsample. Accepts both raw and gzipped\ + \ FASTQ inputs." info: null - default: - - false - required: false + must_exist: true + create_parent: true + required: true direction: "input" multiple: false multiple_sep: ";" - type: "file" - name: "--bam" - description: "Input BAM file" + name: "--input_2" + description: "Second input fastq files to subsample. Accepts both raw and gzipped\ + \ FASTQ inputs." info: null must_exist: true create_parent: true @@ -24,32 +25,12 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "file" - name: "--bai" - description: "BAM index" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--get_output_stats" - description: "Whether or not to generate output stats." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Output" arguments: - type: "file" - name: "--output_bam" - description: "Deduplicated BAM file" + name: "--output_1" + description: "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`." info: null - default: - - "$id.$key.bam" must_exist: true create_parent: true required: false @@ -57,51 +38,68 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--output_stats" - description: "Directory containing UMI based dedupllication statistics files" + name: "--output_2" + description: "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`." info: null - default: - - "$id.umi_dedup.stats" must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" +- name: "Options" + arguments: + - type: "double" + name: "--probability" + description: "The probability a record is kept, as a percentage (0.0, 1.0). Cannot\ + \ be used with `record-count`" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--record_count" + description: "The exact number of records to keep. Cannot be used with `probability`" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed" + description: "Seed to use for the random number generator" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\ - \ to the read.\n" +description: "fq subsample outputs a subset of records from single or paired FASTQ\ + \ files." test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "chr19.bam" -- type: "file" - path: "chr19.bam.bai" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/umitools/dedup/main.nf" - - "modules/nf-core/umitools/dedup/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +keywords: +- "fastq" +- "subsample" +- "subset" +license: "MIT" +links: + repository: "https://github.com/stjude-rust-labs/fq" + homepage: "https://github.com/stjude-rust-labs/fq/blob/master/README.md" + documentation: "https://github.com/stjude-rust-labs/fq/blob/master/README.md" runners: - type: "executable" id: "executable" @@ -170,56 +168,50 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "rust:1.81-slim" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - - type: "apt" - packages: - - "pip" - interactive: false - - type: "python" - user: false - packages: - - "umi_tools" - upgrade: true + - type: "docker" + run: + - "apt-get update && apt-get install -y git procps && \\\ngit clone --depth 1\ + \ --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\ncd fq &&\ + \ \\\ncargo install --locked --path . && \\\nmv target/release/fq /usr/local/bin/\ + \ && \\\ncd / && rm -rf /fq\n" entrypoint: [] cmd: null - type: "native" id: "native" build_info: - config: "src/umitools/umitools_dedup/config.vsh.yaml" + config: "src/fq_subsample/config.vsh.yaml" runner: "nextflow" engine: "docker|native" - output: "target/nextflow/umitools/umitools_dedup" - executable: "target/nextflow/umitools/umitools_dedup/main.nf" + output: "target/nextflow/fq_subsample" + executable: "target/nextflow/fq_subsample/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/fq_subsample/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf similarity index 96% rename from target/nextflow/fq_subsample/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf index ce49995..b0a41ef 100644 --- a/target/nextflow/fq_subsample/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf @@ -2812,22 +2812,21 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--input", - "description" : "Input fastq files to subsample", + "name" : "--input_1", + "description" : "First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs.", "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "input", - "multiple" : true, + "multiple" : false, "multiple_sep" : ";" }, { - "type" : "string", - "name" : "--extra_args", - "description" : "Extra arguments to pass to fq subsample", - "default" : [ - "" - ], + "type" : "file", + "name" : "--input_2", + "description" : "Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs.", + "must_exist" : true, + "create_parent" : true, "required" : false, "direction" : "input", "multiple" : false, @@ -2836,15 +2835,12 @@ meta = [ ] }, { - "name" : "Input", + "name" : "Output", "arguments" : [ { "type" : "file", "name" : "--output_1", - "description" : "Sampled read 1 fastq files", - "default" : [ - "$id.read_1.subsampled.fastq" - ], + "description" : "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`.", "must_exist" : true, "create_parent" : true, "required" : false, @@ -2855,11 +2851,8 @@ meta = [ { "type" : "file", "name" : "--output_2", - "description" : "Sampled read 2 fastq files", - "default" : [ - "$id.read_2.subsampled.fastq" - ], - "must_exist" : false, + "description" : "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`.", + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2867,6 +2860,38 @@ meta = [ "multiple_sep" : ";" } ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "double", + "name" : "--probability", + "description" : "The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--record_count", + "description" : "The exact number of records to keep. Cannot be used with `probability`", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "Seed to use for the random number generator", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] } ], "resources" : [ @@ -2876,7 +2901,7 @@ meta = [ "is_executable" : true } ], - "description" : "fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n", + "description" : "fq subsample outputs a subset of records from single or paired FASTQ files.", "test_resources" : [ { "type" : "bash_script", @@ -2885,43 +2910,26 @@ meta = [ }, { "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/fq/subsample/main.nf", - "modules/nf-core/fq/subsample/meta.yml" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "fastq", + "subsample", + "subset" ], + "license" : "MIT", + "links" : { + "repository" : "https://github.com/stjude-rust-labs/fq", + "homepage" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md", + "documentation" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md" + }, "runners" : [ { "type" : "executable", @@ -3000,7 +3008,7 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "rust:1.81-slim", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", @@ -3008,10 +3016,7 @@ meta = [ { "type" : "docker", "run" : [ - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\napt-get update && \\\\\napt-get install -y --no-install-recommends build-essential git-all curl && \\\\\ncurl https://sh.rustup.rs -sSf | sh -s -- -y && \\\\\n. \\"$HOME/.cargo/env\\" && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\nmv fq /usr/local/ && cd /usr/local/fq && \\\\\ncargo install --locked --path . && \\\\\nmv /usr/local/fq/target/release/fq /usr/local/bin/\n" - ], - "env" : [ - "TZ=Europe/Brussels" + "apt-get update && apt-get install -y git procps && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\ncd fq && \\\\\ncargo install --locked --path . && \\\\\nmv target/release/fq /usr/local/bin/ && \\\\\ncd / && rm -rf /fq\n" ] } ] @@ -3025,46 +3030,36 @@ meta = [ "config" : "/workdir/root/repo/src/fq_subsample/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/fq_subsample", + "output" : "target/nextflow/fq_subsample", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3078,12 +3073,17 @@ def innerWorkflowFactory(args) { def rawScript = '''set -e tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN +#!/bin/bash + ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_1+x} ]; then echo "${VIASH_PAR_INPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_1='&'#" ; else echo "# par_input_1="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_2+x} ]; then echo "${VIASH_PAR_INPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_2='&'#" ; else echo "# par_input_2="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT_1+x} ]; then echo "${VIASH_PAR_OUTPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_1='&'#" ; else echo "# par_output_1="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT_2+x} ]; then echo "${VIASH_PAR_OUTPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_2='&'#" ; else echo "# par_output_2="; fi ) +$( if [ ! -z ${VIASH_PAR_PROBABILITY+x} ]; then echo "${VIASH_PAR_PROBABILITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_probability='&'#" ; else echo "# par_probability="; fi ) +$( if [ ! -z ${VIASH_PAR_RECORD_COUNT+x} ]; then echo "${VIASH_PAR_RECORD_COUNT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_record_count='&'#" ; else echo "# par_record_count="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3104,29 +3104,27 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) ## VIASH END -#!/bin/bash set -eo pipefail -IFS=";" read -ra input <<< \\$par_input -n_fastq=\\${#input[@]} -required_args=("-p" "--probability" "-n" "--read-count") -for arg in "\\${required_args[@]}"; do - if [[ "\\$par_extra_args" == *"\\$arg"* ]]; then - echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args" - exit 1 - fi -done +required_args=("-p" "--probability" "-n" "--record_count") -if [ \\$n_fastq -eq 1 ]; then - fq subsample \\$par_extra_args \\${input[*]} --r1-dst \\$par_output_1 -elif [ \\$n_fastq -eq 2 ]; then - fq subsample \\$par_extra_args \\${input[*]} --r1-dst \\$par_output_1 --r2-dst \\$par_output_2 -else - echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!" +# exclusive OR for required arguments \\$par_probability and \\$par_record_count +if [[ -n \\$par_probability && -n \\$par_record_count ]] || [[ -z \\$par_probability && -z \\$par_record_count ]]; then + echo "FQ/SUBSAMPLE requires either --probability or --record_count to be specified" exit 1 fi + + +fq subsample \\\\ + \\${par_output_1:+--r1-dst "\\${par_output_1}"} \\\\ + \\${par_output_2:+--r2-dst "\\${par_output_2}"} \\\\ + \\${par_probability:+--probability "\\${par_probability}"} \\\\ + \\${par_record_count:+--record-count "\\${par_record_count}"} \\\\ + \\${par_seed:+--seed "\\${par_seed}"} \\\\ + \\${par_input_1} \\\\ + \\${par_input_2} VIASHMAIN bash "$tempscript" ''' @@ -3487,7 +3485,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/fq_subsample", + "image" : "vsh/biobox/fq_subsample", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/fq_subsample/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow.config similarity index 97% rename from target/nextflow/fq_subsample/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow.config index ac07eca..82746d4 100644 --- a/target/nextflow/fq_subsample/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow.config @@ -3,7 +3,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n' + description = 'fq subsample outputs a subset of records from single or paired FASTQ files.' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/umitools/umitools_dedup/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json similarity index 63% rename from target/nextflow/umitools/umitools_dedup/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json index 199a96d..ace623d 100644 --- a/target/nextflow/umitools/umitools_dedup/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/nextflow_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", -"title": "umitools_dedup", -"description": "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n", +"title": "fq_subsample", +"description": "fq subsample outputs a subset of records from single or paired FASTQ files.", "type": "object", "definitions": { @@ -14,42 +14,21 @@ "properties": { - "paired": { - "type": - "boolean", - "description": "Type: `boolean`, default: `false`. Paired fastq files or not?", - "help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?" - , - "default":false - } - - - , - "bam": { + "input_1": { "type": "string", - "description": "Type: `file`. Input BAM file", - "help_text": "Type: `file`. Input BAM file" + "description": "Type: `file`, required. First input fastq file to subsample", + "help_text": "Type: `file`, required. First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs." } , - "bai": { + "input_2": { "type": "string", - "description": "Type: `file`. BAM index", - "help_text": "Type: `file`. BAM index" - - } - - - , - "get_output_stats": { - "type": - "boolean", - "description": "Type: `boolean`. Whether or not to generate output stats", - "help_text": "Type: `boolean`. Whether or not to generate output stats." + "description": "Type: `file`. Second input fastq files to subsample", + "help_text": "Type: `file`. Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs." } @@ -65,24 +44,64 @@ "properties": { - "output_bam": { + "output_1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_bam.bam`. Deduplicated BAM file", - "help_text": "Type: `file`, default: `$id.$key.output_bam.bam`. Deduplicated BAM file" + "description": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files", + "help_text": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`." , - "default":"$id.$key.output_bam.bam" + "default": "$id.$key.output_1.output_1" } , - "output_stats": { + "output_2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_stats.stats`. Directory containing UMI based dedupllication statistics files", - "help_text": "Type: `file`, default: `$id.$key.output_stats.stats`. Directory containing UMI based dedupllication statistics files" + "description": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files", + "help_text": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`." , - "default":"$id.$key.output_stats.stats" + "default": "$id.$key.output_2.output_2" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "probability": { + "type": + "number", + "description": "Type: `double`. The probability a record is kept, as a percentage (0", + "help_text": "Type: `double`. The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`" + + } + + + , + "record_count": { + "type": + "integer", + "description": "Type: `integer`. The exact number of records to keep", + "help_text": "Type: `integer`. The exact number of records to keep. Cannot be used with `probability`" + + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`. Seed to use for the random number generator", + "help_text": "Type: `integer`. Seed to use for the random number generator" + } @@ -130,6 +149,10 @@ "$ref": "#/definitions/output" }, + { + "$ref": "#/definitions/options" + }, + { "$ref": "#/definitions/nextflow input-output arguments" } diff --git a/target/nextflow/kallisto/kallisto_index/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml similarity index 59% rename from target/nextflow/kallisto/kallisto_index/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml index cdb6b2e..659174a 100644 --- a/target/nextflow/kallisto/kallisto_index/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/.config.vsh.yaml @@ -5,18 +5,22 @@ argument_groups: - name: "Input" arguments: - type: "file" - name: "--transcriptome_fasta" + name: "--input" + description: "Path to a FASTA-file containing the transcriptome sequences, either\ + \ in plain text or \ncompressed (.gz) format.\n" info: null must_exist: true create_parent: true - required: false + required: true direction: "input" multiple: false multiple_sep: ";" - - type: "integer" - name: "--pseudo_aligner_kmer_size" - description: "Kmer length passed to indexing step of pseudoaligners." + - type: "file" + name: "--d_list" + description: "Path to a FASTA-file containing sequences to mask from quantification.\n" info: null + must_exist: true + create_parent: true required: false direction: "input" multiple: false @@ -24,9 +28,9 @@ argument_groups: - name: "Output" arguments: - type: "file" - name: "--kallisto_index" + name: "--index" info: null - default: + example: - "Kallisto_index" must_exist: true create_parent: true @@ -34,37 +38,96 @@ argument_groups: direction: "output" multiple: false multiple_sep: ";" +- name: "Options" + arguments: + - type: "integer" + name: "--kmer_size" + description: "Kmer length passed to indexing step of pseudoaligners (default:\ + \ '31').\n" + info: null + example: + - 31 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--make_unique" + description: "Replace repeated target names with unique names.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--aa" + description: "Generate index from a FASTA-file containing amino acid sequences.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--distiguish" + description: "Generate index where sequences are distinguished by the sequence\ + \ names.\n" + info: null + direction: "input" + - type: "integer" + name: "--min_size" + alternatives: + - "-m" + description: "Length of minimizers (default: automatically chosen).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--ec_max_size" + alternatives: + - "-e" + description: "Maximum number of targets in an equivalence class (default: no maximum).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--tmp" + alternatives: + - "-T" + description: "Path to a directory for temporary files.\n" + info: null + example: + - "tmp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "Create Kallisto index.\n" +description: "Build a Kallisto index for the transcriptome to use Kallisto in the\ + \ mapping-based mode.\n" test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "transcriptome.fasta" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/kallisto/index/main.nf" - - "modules/nf-core/kallisto/index/meta.yml" - last_sha: "c0816976384d5e7ee6079c29c45958df1ffa0ee4" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +keywords: +- "kallisto" +- "index" +license: "BSD 2-Clause License" +references: + doi: + - "https://doi.org/10.1038/nbt.3519" +links: + repository: "https://github.com/pachterlab/kallisto" + homepage: "https://pachterlab.github.io/kallisto/about" + documentation: "https://pachterlab.github.io/kallisto/manual" + issue_tracker: "https://github.com/pachterlab/kallisto/issues" runners: - type: "executable" id: "executable" @@ -155,31 +218,28 @@ build_info: output: "target/nextflow/kallisto/kallisto_index" executable: "target/nextflow/kallisto/kallisto_index/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/kallisto/kallisto_index/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf similarity index 95% rename from target/nextflow/kallisto/kallisto_index/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf index d1bc960..fa3ac85 100644 --- a/target/nextflow/kallisto/kallisto_index/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf @@ -2813,18 +2813,21 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--transcriptome_fasta", + "name" : "--input", + "description" : "Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n", "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "integer", - "name" : "--pseudo_aligner_kmer_size", - "description" : "Kmer length passed to indexing step of pseudoaligners.", + "type" : "file", + "name" : "--d_list", + "description" : "Path to a FASTA-file containing sequences to mask from quantification.\n", + "must_exist" : true, + "create_parent" : true, "required" : false, "direction" : "input", "multiple" : false, @@ -2837,8 +2840,8 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--kallisto_index", - "default" : [ + "name" : "--index", + "example" : [ "Kallisto_index" ], "must_exist" : true, @@ -2849,6 +2852,80 @@ meta = [ "multiple_sep" : ";" } ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--kmer_size", + "description" : "Kmer length passed to indexing step of pseudoaligners (default: '31').\n", + "example" : [ + 31 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--make_unique", + "description" : "Replace repeated target names with unique names.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--aa", + "description" : "Generate index from a FASTA-file containing amino acid sequences.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--distiguish", + "description" : "Generate index where sequences are distinguished by the sequence names.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--min_size", + "alternatives" : [ + "-m" + ], + "description" : "Length of minimizers (default: automatically chosen).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--ec_max_size", + "alternatives" : [ + "-e" + ], + "description" : "Maximum number of targets in an equivalence class (default: no maximum).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--tmp", + "alternatives" : [ + "-T" + ], + "description" : "Path to a directory for temporary files.\n", + "example" : [ + "tmp" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] } ], "resources" : [ @@ -2858,7 +2935,7 @@ meta = [ "is_executable" : true } ], - "description" : "Create Kallisto index.\n", + "description" : "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n", "test_resources" : [ { "type" : "bash_script", @@ -2867,39 +2944,31 @@ meta = [ }, { "type" : "file", - "path" : "/testData/minimal_test/reference/transcriptome.fasta" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/kallisto/index/main.nf", - "modules/nf-core/kallisto/index/meta.yml" - ], - "last_sha" : "c0816976384d5e7ee6079c29c45958df1ffa0ee4" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "kallisto", + "index" ], + "license" : "BSD 2-Clause License", + "references" : { + "doi" : [ + "https://doi.org/10.1038/nbt.3519" + ] + }, + "links" : { + "repository" : "https://github.com/pachterlab/kallisto", + "homepage" : "https://pachterlab.github.io/kallisto/about", + "documentation" : "https://pachterlab.github.io/kallisto/manual", + "issue_tracker" : "https://github.com/pachterlab/kallisto/issues" + }, "runners" : [ { "type" : "executable", @@ -3000,46 +3069,36 @@ meta = [ "config" : "/workdir/root/repo/src/kallisto/kallisto_index/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/kallisto/kallisto_index", + "output" : "target/nextflow/kallisto/kallisto_index", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3053,11 +3112,20 @@ def innerWorkflowFactory(args) { def rawScript = '''set -e tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN +#!/bin/bash + ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_FASTA+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_fasta='&'#" ; else echo "# par_transcriptome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pseudo_aligner_kmer_size='&'#" ; else echo "# par_pseudo_aligner_kmer_size="; fi ) -$( if [ ! -z ${VIASH_PAR_KALLISTO_INDEX+x} ]; then echo "${VIASH_PAR_KALLISTO_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kallisto_index='&'#" ; else echo "# par_kallisto_index="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_D_LIST+x} ]; then echo "${VIASH_PAR_D_LIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_d_list='&'#" ; else echo "# par_d_list="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_KMER_SIZE+x} ]; then echo "${VIASH_PAR_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmer_size='&'#" ; else echo "# par_kmer_size="; fi ) +$( if [ ! -z ${VIASH_PAR_MAKE_UNIQUE+x} ]; then echo "${VIASH_PAR_MAKE_UNIQUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_make_unique='&'#" ; else echo "# par_make_unique="; fi ) +$( if [ ! -z ${VIASH_PAR_AA+x} ]; then echo "${VIASH_PAR_AA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aa='&'#" ; else echo "# par_aa="; fi ) +$( if [ ! -z ${VIASH_PAR_DISTIGUISH+x} ]; then echo "${VIASH_PAR_DISTIGUISH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_distiguish='&'#" ; else echo "# par_distiguish="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_SIZE+x} ]; then echo "${VIASH_PAR_MIN_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_size='&'#" ; else echo "# par_min_size="; fi ) +$( if [ ! -z ${VIASH_PAR_EC_MAX_SIZE+x} ]; then echo "${VIASH_PAR_EC_MAX_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ec_max_size='&'#" ; else echo "# par_ec_max_size="; fi ) +$( if [ ! -z ${VIASH_PAR_TMP+x} ]; then echo "${VIASH_PAR_TMP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tmp='&'#" ; else echo "# par_tmp="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3078,14 +3146,35 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) ## VIASH END -#!/bin/bash set -eo pipefail +unset_if_false=( par_make_unique par_aa par_distinguish ) + +for var in "\\${unset_if_false[@]}"; do + temp_var="\\${!var}" + [[ "\\$temp_var" == "false" ]] && unset \\$var +done + +if [ -n "\\$par_kmer_size" ]; then + if [[ "\\$par_kmer_size" -lt 1 || "\\$par_kmer_size" -gt 31 || \\$(( par_kmer_size % 2 )) -eq 0 ]]; then + echo "Error: Kmer size must be an odd number between 1 and 31." + exit 1 + fi +fi + kallisto index \\\\ - \\${par_pseudo_aligner_kmer_size:+-k \\$par_pseudo_aligner_kmer_size} \\\\ - -i \\$par_kallisto_index \\\\ - \\$par_transcriptome_fasta + -i "\\${par_index}" \\\\ + \\${par_kmer_size:+--kmer-size "\\${par_kmer_size}"} \\\\ + \\${par_make_unique:+--make-unique} \\\\ + \\${par_aa:+--aa} \\\\ + \\${par_distinguish:+--distinguish} \\\\ + \\${par_min_size:+--min-size "\\${par_min_size}"} \\\\ + \\${par_ec_max_size:+--ec-max-size "\\${par_ec_max_size}"} \\\\ + \\${par_d_list:+--d-list "\\${par_d_list}"} \\\\ + \\${meta_cpus:+--threads "\\${meta_cpus}"} \\\\ + \\${par_tmp:+--tmp "\\${par_tmp}"} \\\\ + "\\${par_input}" VIASHMAIN bash "$tempscript" ''' @@ -3446,7 +3535,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/kallisto/kallisto_index", + "image" : "vsh/biobox/kallisto/kallisto_index", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/kallisto/kallisto_index/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow.config similarity index 97% rename from target/nextflow/kallisto/kallisto_index/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow.config index cbbe089..4f967d3 100644 --- a/target/nextflow/kallisto/kallisto_index/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow.config @@ -3,7 +3,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'Create Kallisto index.\n' + description = 'Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/bbmap_bbsplit/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json similarity index 53% rename from target/nextflow/bbmap_bbsplit/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json index 450686b..bf4b2c8 100644 --- a/target/nextflow/bbmap_bbsplit/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/nextflow_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", -"title": "bbmap_bbsplit", -"description": "Split sequencing reads by mapping them to multiple references simultaneously.\n", +"title": "kallisto_index", +"description": "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n", "type": "object", "definitions": { @@ -14,72 +14,21 @@ "properties": { - "id": { - "type": - "string", - "description": "Type: `string`. Sample ID", - "help_text": "Type: `string`. Sample ID" - - } - - - , - "paired": { - "type": - "boolean", - "description": "Type: `boolean`, default: `false`. Paired fastq files or not?", - "help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?" - , - "default":false - } - - - , "input": { "type": "string", - "description": "Type: List of `file`, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)", - "help_text": "Type: List of `file`, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)" + "description": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (", + "help_text": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n" } , - "primary_ref": { + "d_list": { "type": "string", - "description": "Type: `file`. Primary reference FASTA", - "help_text": "Type: `file`. Primary reference FASTA" - - } - - - , - "bbsplit_fasta_list": { - "type": - "string", - "description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit", - "help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit." - - } - - - , - "only_build_index": { - "type": - "boolean", - "description": "Type: `boolean`. true = only build index; false = mapping", - "help_text": "Type: `boolean`. true = only build index; false = mapping" - - } - - - , - "built_bbsplit_index": { - "type": - "string", - "description": "Type: `file`. Directory with index files", - "help_text": "Type: `file`. Directory with index files" + "description": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification", + "help_text": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification.\n" } @@ -95,35 +44,96 @@ "properties": { - "fastq_1": { + "index": { "type": "string", - "description": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Output file for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Output file for read 1." + "description": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. ", + "help_text": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. " , - "default":"$id.$key.fastq_1.fastq" + "default": "$id.$key.index.index" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "kmer_size": { + "type": + "integer", + "description": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027)", + "help_text": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027).\n" + } , - "fastq_2": { + "make_unique": { "type": - "string", - "description": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2." + "boolean", + "description": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names", + "help_text": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names.\n" , - "default":"$id.$key.fastq_2.fastq" + "default": "False" } , - "bbsplit_index": { + "aa": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences", + "help_text": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences.\n" + , + "default": "False" + } + + + , + "distiguish": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names", + "help_text": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names.\n" + , + "default": "False" + } + + + , + "min_size": { + "type": + "integer", + "description": "Type: `integer`. Length of minimizers (default: automatically chosen)", + "help_text": "Type: `integer`. Length of minimizers (default: automatically chosen).\n" + + } + + + , + "ec_max_size": { + "type": + "integer", + "description": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum)", + "help_text": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum).\n" + + } + + + , + "tmp": { "type": "string", - "description": "Type: `file`, default: `$id.$key.bbsplit_index.bbsplit_index`. Directory with index files", - "help_text": "Type: `file`, default: `$id.$key.bbsplit_index.bbsplit_index`. Directory with index files" - , - "default":"$id.$key.bbsplit_index.bbsplit_index" + "description": "Type: `string`, example: `tmp`. Path to a directory for temporary files", + "help_text": "Type: `string`, example: `tmp`. Path to a directory for temporary files.\n" + } @@ -171,6 +181,10 @@ "$ref": "#/definitions/output" }, + { + "$ref": "#/definitions/options" + }, + { "$ref": "#/definitions/nextflow input-output arguments" } diff --git a/target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml similarity index 64% rename from target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml index 7c68ac7..db6bb9a 100644 --- a/target/nextflow/kallisto/kallisto_quant/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/.config.vsh.yaml @@ -11,84 +11,33 @@ argument_groups: info: null must_exist: true create_parent: true - required: false + required: true direction: "input" multiple: true - multiple_sep: "," - - type: "boolean" - name: "--paired" - description: "Paired reads or not." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--strandedness" - description: "Sample strand-specificity." - info: null - required: false - direction: "input" - multiple: false multiple_sep: ";" - type: "file" name: "--index" + alternatives: + - "-i" description: "Kallisto genome index." info: null must_exist: true create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--gtf" - description: "Optional gtf file for translation of transcripts into genomic coordinates." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--chromosomes" - description: "Optional tab separated file with chromosome names and lengths." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--fragment_length" - description: "For single-end mode only, the estimated average fragment length." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--fragment_length_sd" - description: "For single-end mode only, the estimated standard deviation of the\ - \ fragment length." - info: null - required: false + required: true direction: "input" multiple: false multiple_sep: ";" - name: "Output" arguments: - type: "file" - name: "--output" - description: "Kallisto quant results" + name: "--output_dir" + alternatives: + - "-o" + description: "Directory to write output to." info: null - default: - - "$id.kallisto_quant_results" must_exist: true create_parent: true - required: false + required: true direction: "output" multiple: false multiple_sep: ";" @@ -96,73 +45,114 @@ argument_groups: name: "--log" description: "File containing log information from running kallisto quant" info: null - default: - - "$id.kallisto_quant.log.txt" must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - - type: "file" - name: "--run_info" - description: "A json file containing information about the run" +- name: "Options" + arguments: + - type: "boolean_true" + name: "--single" + description: "Single end mode." + info: null + direction: "input" + - type: "boolean_true" + name: "--single_overhang" + description: "Include reads where unobserved rest of fragment is predicted to\ + \ lie outside a transcript." + info: null + direction: "input" + - type: "boolean_true" + name: "--fr_stranded" + description: "Strand specific reads, first read forward." + info: null + direction: "input" + - type: "boolean_true" + name: "--rf_stranded" + description: "Strand specific reads, first read reverse." + info: null + direction: "input" + - type: "double" + name: "--fragment_length" + alternatives: + - "-l" + description: "The estimated average fragment length." info: null - default: - - "$id.run_info.json" - must_exist: true - create_parent: true required: false - direction: "output" + direction: "input" multiple: false multiple_sep: ";" - - type: "file" - name: "--quant_results_file" - description: "TSV file containing abundance estimates from Kallisto" + - type: "double" + name: "--sd" + alternatives: + - "-s" + description: "The estimated standard deviation of the fragment length (default:\ + \ -l, -s values are estimated \nfrom paired end data, but are required when\ + \ using --single).\n" info: null - default: - - "$id.abundance.tsv" - must_exist: true - create_parent: true required: false - direction: "output" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--plaintext" + description: "Output plaintext instead of HDF5." + info: null + direction: "input" + - type: "integer" + name: "--bootstrap_samples" + alternatives: + - "-b" + description: "Number of bootstrap samples to draw. Default: '0'\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--seed" + description: "Random seed for bootstrap. Default: '42'\n" + info: null + example: + - 42 + required: false + direction: "input" multiple: false multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "Computes equivalence classes for reads and quantifies abundances.\n" +description: "Quantifying abundances of transcripts from RNA-Seq data, or more generally\ + \ of target sequences using high-throughput sequencing reads.\n" test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "transcriptome.fasta" -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/kallisto/quant/main.nf" - - "modules/nf-core/kallisto/quant/meta.yml" - last_sha: "aff1d2e02717247831644769fc3ba84868c3fdde" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +keywords: +- "kallisto" +- "quant" +- "pseudoalignment" +license: "BSD 2-Clause License" +references: + doi: + - "10.1038/nbt.3519" +links: + repository: "https://github.com/pachterlab/kallisto" + homepage: "https://pachterlab.github.io/kallisto/about" + documentation: "https://pachterlab.github.io/kallisto/manual" + issue_tracker: "https://github.com/pachterlab/kallisto/issues" runners: - type: "executable" id: "executable" @@ -242,6 +232,9 @@ engines: \nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\ \ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\ \ /usr/local/bin/\n" + - type: "docker" + run: + - "echo \"kallisto: $(kallisto version | sed 's/kallisto, version //')\" > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" @@ -253,31 +246,28 @@ build_info: output: "target/nextflow/kallisto/kallisto_quant" executable: "target/nextflow/kallisto/kallisto_quant/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/kallisto/kallisto_quant/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf similarity index 95% rename from target/nextflow/kallisto/kallisto_quant/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf index e06fd56..bec853b 100644 --- a/target/nextflow/kallisto/kallisto_quant/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf @@ -2817,76 +2817,21 @@ meta = [ "description" : "List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.", "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "input", "multiple" : true, - "multiple_sep" : "," - }, - { - "type" : "boolean", - "name" : "--paired", - "description" : "Paired reads or not.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "string", - "name" : "--strandedness", - "description" : "Sample strand-specificity.", - "required" : false, - "direction" : "input", - "multiple" : false, "multiple_sep" : ";" }, { "type" : "file", "name" : "--index", + "alternatives" : [ + "-i" + ], "description" : "Kallisto genome index.", "must_exist" : true, "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--gtf", - "description" : "Optional gtf file for translation of transcripts into genomic coordinates.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--chromosomes", - "description" : "Optional tab separated file with chromosome names and lengths.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "integer", - "name" : "--fragment_length", - "description" : "For single-end mode only, the estimated average fragment length.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "integer", - "name" : "--fragment_length_sd", - "description" : "For single-end mode only, the estimated standard deviation of the fragment length.", - "required" : false, + "required" : true, "direction" : "input", "multiple" : false, "multiple_sep" : ";" @@ -2898,14 +2843,14 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--output", - "description" : "Kallisto quant results", - "default" : [ - "$id.kallisto_quant_results" + "name" : "--output_dir", + "alternatives" : [ + "-o" ], + "description" : "Directory to write output to.", "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "output", "multiple" : false, "multiple_sep" : ";" @@ -2914,41 +2859,96 @@ meta = [ "type" : "file", "name" : "--log", "description" : "File containing log information from running kallisto quant", - "default" : [ - "$id.kallisto_quant.log.txt" - ], "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", "multiple" : false, "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--single", + "description" : "Single end mode.", + "direction" : "input" }, { - "type" : "file", - "name" : "--run_info", - "description" : "A json file containing information about the run", - "default" : [ - "$id.run_info.json" + "type" : "boolean_true", + "name" : "--single_overhang", + "description" : "Include reads where unobserved rest of fragment is predicted to lie outside a transcript.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--fr_stranded", + "description" : "Strand specific reads, first read forward.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--rf_stranded", + "description" : "Strand specific reads, first read reverse.", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--fragment_length", + "alternatives" : [ + "-l" ], - "must_exist" : true, - "create_parent" : true, + "description" : "The estimated average fragment length.", "required" : false, - "direction" : "output", + "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, { - "type" : "file", - "name" : "--quant_results_file", - "description" : "TSV file containing abundance estimates from Kallisto", - "default" : [ - "$id.abundance.tsv" + "type" : "double", + "name" : "--sd", + "alternatives" : [ + "-s" ], - "must_exist" : true, - "create_parent" : true, + "description" : "The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n", "required" : false, - "direction" : "output", + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--plaintext", + "description" : "Output plaintext instead of HDF5.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--bootstrap_samples", + "alternatives" : [ + "-b" + ], + "description" : "Number of bootstrap samples to draw. Default: '0'\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "Random seed for bootstrap. Default: '42'\n", + "example" : [ + 42 + ], + "required" : false, + "direction" : "input", "multiple" : false, "multiple_sep" : ";" } @@ -2962,7 +2962,7 @@ meta = [ "is_executable" : true } ], - "description" : "Computes equivalence classes for reads and quantifies abundances.\n", + "description" : "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n", "test_resources" : [ { "type" : "bash_script", @@ -2971,47 +2971,32 @@ meta = [ }, { "type" : "file", - "path" : "/testData/minimal_test/reference/transcriptome.fasta" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/kallisto/quant/main.nf", - "modules/nf-core/kallisto/quant/meta.yml" - ], - "last_sha" : "aff1d2e02717247831644769fc3ba84868c3fdde" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "kallisto", + "quant", + "pseudoalignment" ], + "license" : "BSD 2-Clause License", + "references" : { + "doi" : [ + "10.1038/nbt.3519" + ] + }, + "links" : { + "repository" : "https://github.com/pachterlab/kallisto", + "homepage" : "https://pachterlab.github.io/kallisto/about", + "documentation" : "https://pachterlab.github.io/kallisto/manual", + "issue_tracker" : "https://github.com/pachterlab/kallisto/issues" + }, "runners" : [ { "type" : "executable", @@ -3100,6 +3085,12 @@ meta = [ "run" : [ "apt-get update && \\\\\napt-get install -y --no-install-recommends wget && \\\\\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \\\\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\\\nmv kallisto/kallisto /usr/local/bin/\n" ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"kallisto: $(kallisto version | sed 's/kallisto, version //')\\" > /var/software_versions.txt\n" + ] } ] }, @@ -3112,46 +3103,36 @@ meta = [ "config" : "/workdir/root/repo/src/kallisto/kallisto_quant/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/kallisto/kallisto_quant", + "output" : "target/nextflow/kallisto/kallisto_quant", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3165,20 +3146,23 @@ def innerWorkflowFactory(args) { def rawScript = '''set -e tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN +#!/bin/bash + ## VIASH START # The following code has been auto-generated by Viash. $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) $( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) -$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_CHROMOSOMES+x} ]; then echo "${VIASH_PAR_CHROMOSOMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chromosomes='&'#" ; else echo "# par_chromosomes="; fi ) -$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi ) -$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_SD+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_sd='&'#" ; else echo "# par_fragment_length_sd="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi ) $( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) -$( if [ ! -z ${VIASH_PAR_RUN_INFO+x} ]; then echo "${VIASH_PAR_RUN_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_info='&'#" ; else echo "# par_run_info="; fi ) -$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS_FILE+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_results_file='&'#" ; else echo "# par_quant_results_file="; fi ) +$( if [ ! -z ${VIASH_PAR_SINGLE+x} ]; then echo "${VIASH_PAR_SINGLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single='&'#" ; else echo "# par_single="; fi ) +$( if [ ! -z ${VIASH_PAR_SINGLE_OVERHANG+x} ]; then echo "${VIASH_PAR_SINGLE_OVERHANG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single_overhang='&'#" ; else echo "# par_single_overhang="; fi ) +$( if [ ! -z ${VIASH_PAR_FR_STRANDED+x} ]; then echo "${VIASH_PAR_FR_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fr_stranded='&'#" ; else echo "# par_fr_stranded="; fi ) +$( if [ ! -z ${VIASH_PAR_RF_STRANDED+x} ]; then echo "${VIASH_PAR_RF_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rf_stranded='&'#" ; else echo "# par_rf_stranded="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi ) +$( if [ ! -z ${VIASH_PAR_SD+x} ]; then echo "${VIASH_PAR_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sd='&'#" ; else echo "# par_sd="; fi ) +$( if [ ! -z ${VIASH_PAR_PLAINTEXT+x} ]; then echo "${VIASH_PAR_PLAINTEXT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_plaintext='&'#" ; else echo "# par_plaintext="; fi ) +$( if [ ! -z ${VIASH_PAR_BOOTSTRAP_SAMPLES+x} ]; then echo "${VIASH_PAR_BOOTSTRAP_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bootstrap_samples='&'#" ; else echo "# par_bootstrap_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3199,46 +3183,46 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) ## VIASH END -#!/bin/bash set -eo pipefail -IFS="," read -ra input <<< \\$par_input +unset_if_false=( par_single par_single_overhang par_rf_stranded par_fr_stranded par_plaintext ) -single_end_params='' -if [ \\$par_paired == "false" ]; then - if [[ \\$par_fragment_length < 0 ]] || [[ ! \\$fragment_length_sd < 0 ]]; then - echo "fragment_length and fragment_length_sd must be set for single-end data" +for var in "\\${unset_if_false[@]}"; do + temp_var="\\${!var}" + [[ "\\$temp_var" == "false" ]] && unset \\$var +done + +IFS=";" read -ra input <<< \\$par_input + +# Check if par_single is not set and ensure even number of input files +if [ -z "\\$par_single" ]; then + if [ \\$((\\${#input[@]} % 2)) -ne 0 ]; then + echo "Error: When running in paired-end mode, the number of input files must be even." + echo "Number of input files provided: \\${#input[@]}" exit 1 fi - single_end_params="--single --fragment-length \\$par_fragment_length --sd \\$par_fragment_length_sd" fi -strandedness='' -if [[ "\\$par_extra_args" != *"--fr-stranded"* ]] && [[ "\\$par_extra_args" != *"--rf-stranded"* ]]; then - if [ "\\$par_strandedness" == 'forward' ]; then - strandedness='--fr-stranded' - elif [ "\\$par_strandedness" == 'reverse' ]; then - strandedness='--rf-stranded' - fi -fi -mkdir -p \\$par_output +mkdir -p \\$par_output_dir + kallisto quant \\\\ \\${meta_cpus:+--threads \\$meta_cpus} \\\\ - --index \\$par_index \\\\ - \\${par_gtf:+--gtf \\$par_gtf} \\\\ - \\${par_chromosomes:+--chromosomes \\$par_chromosomes} \\\\ - \\$single_end_params \\\\ - \\$strandedness \\\\ - \\$par_extra_args \\\\ - -o \\$par_output \\\\ - \\${input[*]} 2> >(tee -a \\${par_output}/kallisto_quant.log >&2) - -mv \\${par_output}/kallisto_quant.log \\${par_log} -mv \\${par_output}/run_info.json \\${par_run_info} -cp \\${par_output}/abundance.tsv \\${par_quant_results_file} + -i \\$par_index \\\\ + \\${par_gtf:+--gtf "\\${par_gtf}"} \\\\ + \\${par_single:+--single} \\\\ + \\${par_single_overhang:+--single-overhang} \\\\ + \\${par_fr_stranded:+--fr-stranded} \\\\ + \\${par_rf_stranded:+--rf-stranded} \\\\ + \\${par_plaintext:+--plaintext} \\\\ + \\${par_bootstrap_samples:+--bootstrap-samples "\\${par_bootstrap_samples}"} \\\\ + \\${par_fragment_length:+--fragment-length "\\${par_fragment_length}"} \\\\ + \\${par_sd:+--sd "\\${par_sd}"} \\\\ + \\${par_seed:+--seed "\\${par_seed}"} \\\\ + -o \\$par_output_dir \\\\ + \\${input[*]} 2> >(tee -a \\$par_log >&2) VIASHMAIN bash "$tempscript" ''' @@ -3599,7 +3583,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/kallisto/kallisto_quant", + "image" : "vsh/biobox/kallisto/kallisto_quant", "tag" : "main" }, "tag" : "$id" diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow.config new file mode 100644 index 0000000..145313e --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'kallisto/kallisto_quant' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'main' + description = 'Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json new file mode 100644 index 0000000..260d9ca --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/nextflow_schema.json @@ -0,0 +1,225 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "kallisto_quant", +"description": "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively", + "help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively." + + } + + + , + "index": { + "type": + "string", + "description": "Type: `file`, required. Kallisto genome index", + "help_text": "Type: `file`, required. Kallisto genome index." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "output_dir": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to", + "help_text": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to." + , + "default": "$id.$key.output_dir.output_dir" + } + + + , + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant", + "help_text": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant" + , + "default": "$id.$key.log.log" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "single": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Single end mode", + "help_text": "Type: `boolean_true`, default: `false`. Single end mode." + , + "default": "False" + } + + + , + "single_overhang": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript", + "help_text": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript." + , + "default": "False" + } + + + , + "fr_stranded": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward", + "help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward." + , + "default": "False" + } + + + , + "rf_stranded": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse", + "help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse." + , + "default": "False" + } + + + , + "fragment_length": { + "type": + "number", + "description": "Type: `double`. The estimated average fragment length", + "help_text": "Type: `double`. The estimated average fragment length." + + } + + + , + "sd": { + "type": + "number", + "description": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single)", + "help_text": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n" + + } + + + , + "plaintext": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5", + "help_text": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5." + , + "default": "False" + } + + + , + "bootstrap_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Number of bootstrap samples to draw", + "help_text": "Type: `integer`, example: `0`. Number of bootstrap samples to draw. Default: \u00270\u0027\n" + + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`, example: `42`. Random seed for bootstrap", + "help_text": "Type: `integer`, example: `42`. Random seed for bootstrap. Default: \u002742\u0027\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/qualimap/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml similarity index 67% rename from target/nextflow/qualimap/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml index 4fcc2d4..4a1b9ad 100644 --- a/target/nextflow/qualimap/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/.config.vsh.yaml @@ -1,12 +1,30 @@ -name: "qualimap" +name: "qualimap_rnaseq" +namespace: "qualimap" version: "main" +authors: +- name: "Dorien Roosen" + roles: + - "author" + - "maintainer" + info: + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" argument_groups: - name: "Input" arguments: - type: "file" - name: "--input" - description: "path to input mapping file in BAM format." + name: "--bam" + description: "Path to the sequence alignment file in BAM format, produced by a\ + \ splicing-aware aligner." info: null + example: + - "alignment.bam" must_exist: true create_parent: true required: true @@ -15,8 +33,10 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--gtf" - description: "path to annotations file in Ensembl GTF format." + description: "Path to genomic annotations in Ensembl GTF format." info: null + example: + - "annotations.gtf" must_exist: true create_parent: true required: true @@ -26,11 +46,21 @@ argument_groups: - name: "Output" arguments: - type: "file" - name: "--output_dir" - description: "path to output directory for raw data and report." + name: "--qc_results" + description: "Text file containing the RNAseq QC results." + info: null + example: + - "rnaseq_qc_results.txt" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts" + description: "Output file for computed counts." info: null - default: - - "$id.qualimap_output" must_exist: true create_parent: true required: false @@ -38,48 +68,34 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--output_pdf" - description: "path to output file for pdf report." + name: "--report" + description: "Report output file. Supported formats are PDF or HTML." info: null - default: - - "$id.report.pdf" - must_exist: false + example: + - "report.html" + must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - - type: "string" - name: "--output_format" - description: "Format of the output report (PDF or HTML, default is HTML)" - info: null - default: - - "html" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Optional" arguments: - type: "integer" - name: "--pr_bases" + name: "--num_pr_bases" description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ \ bias (default = 100)." info: null - default: - - 100 required: false min: 1 direction: "input" multiple: false multiple_sep: ";" - type: "integer" - name: "--tr_bias" + name: "--num_tr_bias" description: "Number of top highly expressed transcripts to compute 5'-3' bias\ \ (default = 1000)." info: null - default: - - 1000 required: false min: 1 direction: "input" @@ -89,9 +105,10 @@ argument_groups: name: "--algorithm" description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)." info: null - default: - - "uniquely-mapped-reads" required: false + choices: + - "uniquely-mapped-reads" + - "proportional" direction: "input" multiple: false multiple_sep: ";" @@ -100,8 +117,6 @@ argument_groups: description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\ \ or non-strand-specific (default))." info: null - default: - - "non-strand-specific" required: false choices: - "non-strand-specific" @@ -127,8 +142,6 @@ argument_groups: name: "--java_memory_size" description: "maximum Java heap memory size, default = 4G." info: null - default: - - "4G" required: false direction: "input" multiple: false @@ -137,36 +150,33 @@ resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "RNA-seq QC analysis using the qualimap \n" +description: "Qualimap RNA-seq QC reports quality control metrics and bias estimations\ + \ \nwhich are specific for whole transcriptome sequencing, including reads genomic\ + \ \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n" test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" -- type: "file" - path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai" -- type: "file" - path: "genes.gtf" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/qualimap/rnaseq/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +keywords: +- "RNA-seq" +- "quality control" +- "QC Report" +license: "GPL-2.0" +references: + doi: + - "10.1093/bioinformatics/btv566" +links: + repository: "https://bitbucket.org/kokonech/qualimap/commits/branch/master" + homepage: "http://qualimap.conesalab.org/" + documentation: "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc" + issue_tracker: "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open" runners: - type: "executable" id: "executable" @@ -235,67 +245,47 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "quay.io/biocontainers/qualimap:2.3--hdfd78af_0" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - - type: "apt" - packages: - - "r-base" - - "unzip" - - "wget" - - "openjdk-8-jdk" - - "libxml2-dev" - - "libcurl4-openssl-dev" - interactive: false - type: "docker" run: - - "wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip &&\ - \ \\\nunzip qualimap_v2.3.zip && \\\ncp -a qualimap_v2.3/. usr/bin && \\\nunset\ - \ DISPLAY && \\\nmkdir -p tmp && \\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n" - - type: "r" - cran: - - "optparse" - bioc: - - "NOISeqr" - bioc_force_install: false + - "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" id: "native" build_info: - config: "src/qualimap/config.vsh.yaml" + config: "src/qualimap/qualimap_rnaseq/config.vsh.yaml" runner: "nextflow" engine: "docker|native" - output: "target/nextflow/qualimap" - executable: "target/nextflow/qualimap/main.nf" + output: "target/nextflow/qualimap/qualimap_rnaseq" + executable: "target/nextflow/qualimap/qualimap_rnaseq/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/qualimap/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf similarity index 95% rename from target/nextflow/qualimap/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf index 6890d89..0d70bed 100644 --- a/target/nextflow/qualimap/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf @@ -1,4 +1,4 @@ -// qualimap main +// qualimap_rnaseq main // // This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -8,6 +8,9 @@ // authors of this component should specify the license in the header of such // files, or include a separate license file detailing the licenses of all included // files. +// +// Component authors: +// * Dorien Roosen (author, maintainer) //////////////////////////// // VDSL3 helper functions // @@ -2804,16 +2807,43 @@ nextflow.enable.dsl=2 meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ - "name" : "qualimap", + "name" : "qualimap_rnaseq", + "namespace" : "qualimap", "version" : "main", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], "argument_groups" : [ { "name" : "Input", "arguments" : [ { "type" : "file", - "name" : "--input", - "description" : "path to input mapping file in BAM format.", + "name" : "--bam", + "description" : "Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner.", + "example" : [ + "alignment.bam" + ], "must_exist" : true, "create_parent" : true, "required" : true, @@ -2824,7 +2854,10 @@ meta = [ { "type" : "file", "name" : "--gtf", - "description" : "path to annotations file in Ensembl GTF format.", + "description" : "Path to genomic annotations in Ensembl GTF format.", + "example" : [ + "annotations.gtf" + ], "must_exist" : true, "create_parent" : true, "required" : true, @@ -2839,13 +2872,24 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--output_dir", - "description" : "path to output directory for raw data and report.", - "default" : [ - "$id.qualimap_output" + "name" : "--qc_results", + "description" : "Text file containing the RNAseq QC results.", + "example" : [ + "rnaseq_qc_results.txt" ], "must_exist" : true, "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts", + "description" : "Output file for computed counts.", + "must_exist" : true, + "create_parent" : true, "required" : false, "direction" : "output", "multiple" : false, @@ -2853,29 +2897,17 @@ meta = [ }, { "type" : "file", - "name" : "--output_pdf", - "description" : "path to output file for pdf report.", - "default" : [ - "$id.report.pdf" + "name" : "--report", + "description" : "Report output file. Supported formats are PDF or HTML.", + "example" : [ + "report.html" ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", "multiple" : false, "multiple_sep" : ";" - }, - { - "type" : "string", - "name" : "--output_format", - "description" : "Format of the output report (PDF or HTML, default is HTML)", - "default" : [ - "html" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" } ] }, @@ -2884,11 +2916,8 @@ meta = [ "arguments" : [ { "type" : "integer", - "name" : "--pr_bases", + "name" : "--num_pr_bases", "description" : "Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).", - "default" : [ - 100 - ], "required" : false, "min" : 1, "direction" : "input", @@ -2897,11 +2926,8 @@ meta = [ }, { "type" : "integer", - "name" : "--tr_bias", + "name" : "--num_tr_bias", "description" : "Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).", - "default" : [ - 1000 - ], "required" : false, "min" : 1, "direction" : "input", @@ -2912,10 +2938,11 @@ meta = [ "type" : "string", "name" : "--algorithm", "description" : "Counting algorithm (uniquely-mapped-reads (default) or proportional).", - "default" : [ - "uniquely-mapped-reads" - ], "required" : false, + "choices" : [ + "uniquely-mapped-reads", + "proportional" + ], "direction" : "input", "multiple" : false, "multiple_sep" : ";" @@ -2924,9 +2951,6 @@ meta = [ "type" : "string", "name" : "--sequencing_protocol", "description" : "Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", - "default" : [ - "non-strand-specific" - ], "required" : false, "choices" : [ "non-strand-specific", @@ -2953,9 +2977,6 @@ meta = [ "type" : "string", "name" : "--java_memory_size", "description" : "maximum Java heap memory size, default = 4G.", - "default" : [ - "4G" - ], "required" : false, "direction" : "input", "multiple" : false, @@ -2971,7 +2992,7 @@ meta = [ "is_executable" : true } ], - "description" : "RNA-seq QC analysis using the qualimap \n", + "description" : "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n", "test_resources" : [ { "type" : "bash_script", @@ -2980,46 +3001,32 @@ meta = [ }, { "type" : "file", - "path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/genes.gtf" + "path" : "test_data/" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/qualimap/rnaseq/main.nf" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "RNA-seq", + "quality control", + "QC Report" ], + "license" : "GPL-2.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/btv566" + ] + }, + "links" : { + "repository" : "https://bitbucket.org/kokonech/qualimap/commits/branch/master", + "homepage" : "http://qualimap.conesalab.org/", + "documentation" : "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc", + "issue_tracker" : "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open" + }, "runners" : [ { "type" : "executable", @@ -3098,38 +3105,16 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "quay.io/biocontainers/qualimap:2.3--hdfd78af_0", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", "setup" : [ - { - "type" : "apt", - "packages" : [ - "r-base", - "unzip", - "wget", - "openjdk-8-jdk", - "libxml2-dev", - "libcurl4-openssl-dev" - ], - "interactive" : false - }, { "type" : "docker", "run" : [ - "wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \\\\\nunzip qualimap_v2.3.zip && \\\\\ncp -a qualimap_v2.3/. usr/bin && \\\\\nunset DISPLAY && \\\\\nmkdir -p tmp && \\\\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n" + "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n" ] - }, - { - "type" : "r", - "cran" : [ - "optparse" - ], - "bioc" : [ - "NOISeqr" - ], - "bioc_force_install" : false } ] }, @@ -3139,49 +3124,39 @@ meta = [ } ], "build_info" : { - "config" : "/workdir/root/repo/src/qualimap/config.vsh.yaml", + "config" : "/workdir/root/repo/src/qualimap/qualimap_rnaseq/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/qualimap", + "output" : "target/nextflow/qualimap/qualimap_rnaseq", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3197,13 +3172,13 @@ tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) $( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_PDF+x} ]; then echo "${VIASH_PAR_OUTPUT_PDF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_pdf='&'#" ; else echo "# par_output_pdf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi ) -$( if [ ! -z ${VIASH_PAR_PR_BASES+x} ]; then echo "${VIASH_PAR_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pr_bases='&'#" ; else echo "# par_pr_bases="; fi ) -$( if [ ! -z ${VIASH_PAR_TR_BIAS+x} ]; then echo "${VIASH_PAR_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tr_bias='&'#" ; else echo "# par_tr_bias="; fi ) +$( if [ ! -z ${VIASH_PAR_QC_RESULTS+x} ]; then echo "${VIASH_PAR_QC_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qc_results='&'#" ; else echo "# par_qc_results="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo "${VIASH_PAR_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts='&'#" ; else echo "# par_counts="; fi ) +$( if [ ! -z ${VIASH_PAR_REPORT+x} ]; then echo "${VIASH_PAR_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report='&'#" ; else echo "# par_report="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_PR_BASES+x} ]; then echo "${VIASH_PAR_NUM_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_pr_bases='&'#" ; else echo "# par_num_pr_bases="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_TR_BIAS+x} ]; then echo "${VIASH_PAR_NUM_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_tr_bias='&'#" ; else echo "# par_num_tr_bias="; fi ) $( if [ ! -z ${VIASH_PAR_ALGORITHM+x} ]; then echo "${VIASH_PAR_ALGORITHM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_algorithm='&'#" ; else echo "# par_algorithm="; fi ) $( if [ ! -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then echo "${VIASH_PAR_SEQUENCING_PROTOCOL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sequencing_protocol='&'#" ; else echo "# par_sequencing_protocol="; fi ) $( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) @@ -3233,20 +3208,52 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" set -eo pipefail -mkdir -p \\$par_output_dir +tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" qualimap_XXXXXXXXX) +# Handle output parameters +if [ -n "\\$par_report" ]; then + outfile=\\$(basename "\\$par_report") + report_extension="\\${outfile##*.}" +fi + +if [ -n "\\$par_counts" ]; then + counts=\\$(basename "\\$par_counts") +fi + +# disable flags +[[ "\\$par_paired" == "false" ]] && unset par_paired +[[ "\\$par_sorted" == "false" ]] && unset par_sorted + +# Run qualimap qualimap rnaseq \\\\ - --java-mem-size=\\$par_java_memory_size \\\\ - --algorithm \\$par_algorithm \\\\ - --num-pr-bases \\$par_pr_bases \\\\ - --num-tr-bias \\$par_tr_bias \\\\ - --sequencing-protocol \\$par_sequencing_protocol \\\\ - -bam \\$par_input \\\\ + \\${meta_memory_mb:+--java-mem-size=\\${meta_memory_mb}M} \\\\ + \\${par_algorithm:+--algorithm \\$par_algorithm} \\\\ + \\${par_sequencing_protocol:+--sequencing-protocol \\$par_sequencing_protocol} \\\\ + -bam \\$par_bam \\\\ -gtf \\$par_gtf \\\\ - \\${par_paired:+-pe} \\\\ - \\${par_sorted:+-s} \\\\ - -outdir \\$par_output_dir \\\\ - -outformat \\$par_output_format + -outdir "\\$tmp_dir" \\\\ + \\${par_num_pr_bases:+--num-pr-bases \\$par_num_pr_bases} \\\\ + \\${par_num_tr_bias:+--num-tr-bias \\$par_num_tr_bias} \\\\ + \\${par_report:+-outformat \\$report_extension} \\\\ + \\${par_paired:+--paired} \\\\ + \\${par_sorted:+--sorted} \\\\ + \\${par_report:+-outfile "\\$outfile"} \\\\ + \\${par_counts:+-oc "\\$counts"} + +# Move output files +mv "\\$tmp_dir/rnaseq_qc_results.txt" "\\$par_qc_results" + +if [ -n "\\$par_report" ] && [ \\$report_extension = "html" ]; then + mv "\\$tmp_dir/qualimapReport.html" "\\$par_report" +fi + +if [ -n "\\$par_report" ] && [ \\$report_extension = "pdf" ]; then + mv "\\$tmp_dir/\\$outfile" "\\$par_report" +fi + +if [ -n "\\$par_counts" ]; then + mv "\\$tmp_dir/\\$counts" "\\$par_counts" +fi VIASHMAIN bash "$tempscript" ''' @@ -3607,7 +3614,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/qualimap", + "image" : "vsh/biobox/qualimap/qualimap_rnaseq", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/fastqc/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow.config similarity index 92% rename from target/nextflow/fastqc/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow.config index e672c7e..d3ec32b 100644 --- a/target/nextflow/fastqc/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow.config @@ -1,9 +1,10 @@ manifest { - name = 'fastqc' + name = 'qualimap/qualimap_rnaseq' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n' + description = 'Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n' + author = 'Dorien Roosen' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/qualimap/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json similarity index 59% rename from target/nextflow/qualimap/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json index 8defe51..aaf2e05 100644 --- a/target/nextflow/qualimap/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/nextflow_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", -"title": "qualimap", -"description": "RNA-seq QC analysis using the qualimap \n", +"title": "qualimap_rnaseq", +"description": "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5\u2019-3\u2019 bias computation.\n", "type": "object", "definitions": { @@ -14,11 +14,11 @@ "properties": { - "input": { + "bam": { "type": "string", - "description": "Type: `file`, required. path to input mapping file in BAM format", - "help_text": "Type: `file`, required. path to input mapping file in BAM format." + "description": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner", + "help_text": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner." } @@ -27,8 +27,8 @@ "gtf": { "type": "string", - "description": "Type: `file`, required. path to annotations file in Ensembl GTF format", - "help_text": "Type: `file`, required. path to annotations file in Ensembl GTF format." + "description": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format", + "help_text": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format." } @@ -44,35 +44,35 @@ "properties": { - "output_dir": { + "qc_results": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_dir.qualimap_output`. path to output directory for raw data and report", - "help_text": "Type: `file`, default: `$id.$key.output_dir.qualimap_output`. path to output directory for raw data and report." + "description": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results." , - "default":"$id.$key.output_dir.qualimap_output" + "default": "$id.$key.qc_results.txt" } , - "output_pdf": { + "counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_pdf.pdf`. path to output file for pdf report", - "help_text": "Type: `file`, default: `$id.$key.output_pdf.pdf`. path to output file for pdf report." + "description": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts." , - "default":"$id.$key.output_pdf.pdf" + "default": "$id.$key.counts.counts" } , - "output_format": { + "report": { "type": "string", - "description": "Type: `string`, default: `html`. Format of the output report (PDF or HTML, default is HTML)", - "help_text": "Type: `string`, default: `html`. Format of the output report (PDF or HTML, default is HTML)" + "description": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file", + "help_text": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file. Supported formats are PDF or HTML." , - "default":"html" + "default": "$id.$key.report.html" } @@ -87,24 +87,22 @@ "properties": { - "pr_bases": { + "num_pr_bases": { "type": "integer", - "description": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)", - "help_text": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)." - , - "default":100 + "description": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)", + "help_text": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)." + } , - "tr_bias": { + "num_tr_bias": { "type": "integer", - "description": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)", - "help_text": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)." - , - "default":1000 + "description": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)", + "help_text": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)." + } @@ -112,10 +110,11 @@ "algorithm": { "type": "string", - "description": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm (uniquely-mapped-reads (default) or proportional)", - "help_text": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm (uniquely-mapped-reads (default) or proportional)." - , - "default":"uniquely-mapped-reads" + "description": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional)", + "help_text": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional).", + "enum": ["uniquely-mapped-reads", "proportional"] + + } @@ -123,12 +122,11 @@ "sequencing_protocol": { "type": "string", - "description": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))", - "help_text": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", + "description": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))", + "help_text": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).", "enum": ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"] - , - "default":"non-strand-specific" + } @@ -139,7 +137,7 @@ "description": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads", "help_text": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads." , - "default":false + "default": "False" } @@ -150,7 +148,7 @@ "description": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name", "help_text": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis." , - "default":false + "default": "False" } @@ -158,10 +156,9 @@ "java_memory_size": { "type": "string", - "description": "Type: `string`, default: `4G`. maximum Java heap memory size, default = 4G", - "help_text": "Type: `string`, default: `4G`. maximum Java heap memory size, default = 4G." - , - "default":"4G" + "description": "Type: `string`. maximum Java heap memory size, default = 4G", + "help_text": "Type: `string`. maximum Java heap memory size, default = 4G." + } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml new file mode 100644 index 0000000..780fa6d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml @@ -0,0 +1,879 @@ +name: "rsem_calculate_expression" +namespace: "rsem" +version: "main" +argument_groups: +- name: "Input" + arguments: + - type: "string" + name: "--id" + description: "Sample ID." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--strandedness" + description: "Sample strand-specificity. Must be one of unstranded, forward, reverse" + info: null + required: false + choices: + - "forward" + - "reverse" + - "unstranded" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--paired" + description: "Paired-end reads or not?" + info: null + direction: "input" + - type: "file" + name: "--input" + description: "Input reads for quantification." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--index" + description: "RSEM index." + info: null + must_exist: false + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extra_args" + description: "Extra rsem-calculate-expression arguments in addition to the examples." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--counts_gene" + description: "Expression counts on gene level" + info: null + example: + - "$id.genes.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--counts_transcripts" + description: "Expression counts on transcript level" + info: null + example: + - "$id.isoforms.results" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--stat" + description: "RSEM statistics" + info: null + example: + - "$id.stat" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--logs" + description: "RSEM logs" + info: null + example: + - "$id.log" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_star" + description: "BAM file generated by STAR (optional)" + info: null + example: + - "$id.STAR.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_genome" + description: "Genome BAM file (optional)" + info: null + example: + - "$id.genome.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--bam_transcript" + description: "Transcript BAM file (optional)" + info: null + example: + - "$id.transcript.bam" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sort_bam_by_read_name" + description: "Sort BAM file aligned under transcript coordidate by read name.\ + \ Setting this option on will produce \ndeterministic maximum likelihood estimations\ + \ from independent runs. Note that sorting will take long \ntime and lots of\ + \ memory.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--no_bam_output" + description: "Do not output any BAM file." + info: null + direction: "input" + - type: "boolean_true" + name: "--sampling_for_bam" + description: "When RSEM generates a BAM file, instead of outputting all alignments\ + \ a read has with their posterior \nprobabilities, one alignment is sampled\ + \ according to the posterior probabilities. The sampling procedure \nincludes\ + \ the alignment to the \"noise\" transcript, which does not appear in the BAM\ + \ file. Only the \nsampled alignment has a weight of 1. All other alignments\ + \ have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared\ + \ in the BAM file should have weight 0.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--output_genome_bam" + description: "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped\ + \ to genomic coordinates and \nannotated with their posterior probabilities.\ + \ In addition, RSEM will call samtools (included in RSEM \npackage) to sort\ + \ and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai'\ + \ \nwill be generated.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--sort_bam_by_coordinate" + description: "Sort RSEM generated transcript and genome BAM files by coordinates\ + \ and build associated indices.\n" + info: null + direction: "input" +- name: "Basic Options" + arguments: + - type: "boolean_true" + name: "--no_qualities" + description: "Input reads do not contain quality scores." + info: null + direction: "input" + - type: "boolean_true" + name: "--alignments" + description: "Input file contains alignments in SAM/BAM/CRAM format. The exact\ + \ file format will be determined \nautomatically.\n" + info: null + direction: "input" + - type: "file" + name: "--fai" + description: "If the header section of input alignment file does not contain reference\ + \ sequence information, \nthis option should be turned on. is a FAI format\ + \ file containing each reference sequence's \nname and length. Please refer\ + \ to the SAM official website for the details of FAI format.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--bowtie2" + description: "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM\ + \ does not handle indel, local \nand discordant alignments, the Bowtie2 parameters\ + \ are set in a way to avoid those alignments. In \nparticular, we use options\ + \ '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1'\ + \ \nby default. The last parameter of '--score_min', '-0.1', is the negative\ + \ of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'.\ + \ If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--star" + description: "Use STAR to align reads. Alignment parameters are from ENCODE3's\ + \ STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's\ + \ Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory\ + \ with name as 'sample_name.bam'. Each STAR job will have its own private copy\ + \ of \nthe genome in memory.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--hisat2_hca" + description: "Use HISAT2 to align reads to the transcriptome according to Human\ + \ Cell Atlast.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--append_names" + description: "If gene_name/transcript_name is available, append it to the end\ + \ of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results'\ + \ and 'sample_name.genes.results'.\n" + info: null + direction: "input" + - type: "integer" + name: "--seed" + description: "Set the seed for the random number generators used in calculating\ + \ posterior mean estimates and \ncredibility intervals. The seed must be a non-negative\ + \ 32 bit integer.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--single_cell_prior" + description: "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior\ + \ mean estimates and credibility \nintervals. However, much less genes are expressed\ + \ in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean\ + \ estimates and/or credibility intervals and you have single-cell RNA-Seq data,\ + \ \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1)\ + \ as the prior which \nencourage the sparsity of the expression levels.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--calc_pme" + description: "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates." + info: null + direction: "input" + - type: "boolean_true" + name: "--calc_ci" + description: "Calculate 95% credibility intervals and posterior mean estimates.\ + \ The credibility level can be \nchanged by setting '--ci_credibility_level'.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--quiet" + alternatives: + - "-q" + description: "Suppress the output of logging information." + info: null + direction: "input" +- name: "Aligner Options" + arguments: + - type: "integer" + name: "--seed_length" + description: "Seed length used by the read aligner. Providing the correct value\ + \ is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's\ + \ seed length parameter. Any read with its or at least \none of its mates' (for\ + \ paired-end reads) length less than this value will be ignored. If the \nreferences\ + \ are not added poly(A) tails, the minimum allowed value is 5, otherwise, the\ + \ minimum \nallowed value is 25. Note that this script will only check if the\ + \ value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default:\ + \ 25)\n" + info: null + example: + - 25 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--phred64_quals" + description: "Input quality scores are encoded as Phred+64 (default for GA Pipeline\ + \ ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise,\ + \ quality score will be encoded as Phred+33. (Default: false)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--solexa_quals" + description: "Input quality scores are solexa encoded (from GA Pipeline ver. <\ + \ 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality\ + \ score will be encoded as Phred+33. (Default: false)\n" + info: null + direction: "input" + - type: "integer" + name: "--bowtie_n" + description: "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,\ + \ Default: 2)\n" + info: null + example: + - 2 + required: false + choices: + - 0 + - 1 + - 2 + - 3 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie_e" + description: "(Bowtie parameter) max sum of mismatch quality scores across the\ + \ alignment. (Default: 99999999)\n" + info: null + example: + - 99999999 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie_m" + description: "(Bowtie parameter) suppress all alignments for a read if > \ + \ valid alignments exist. (Default: 200)\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie_chunkmbs" + description: "(Bowtie parameter) memory allocated for best first alignment calculation\ + \ (Default: 0 - use Bowtie's default)\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--bowtie2_mismatch_rate" + description: "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:\ + \ 0.1)\n" + info: null + example: + - 0.1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--bowtie2_k" + description: "(Bowtie 2 parameter) Find up to alignments per read. (Default:\ + \ 200)\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--bowtie2_sensitivity_level" + description: "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end\ + \ mode. This option controls how \nhard Bowtie 2 tries to find alignments. \ + \ must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\"\ + . The four candidates correspond to Bowtie 2's \"--very-fast\", \"--fast\",\ + \ \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\"\ + \ - use Bowtie 2's default)\n" + info: null + example: + - "sensitive" + required: false + choices: + - "very_fast" + - "fast" + - "sensitive" + - "very_sensitive" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--star_gzipped_read_file" + description: "Input read file(s) is compressed by gzip. (Default: false)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--star_bzipped_read_file" + description: "Input read file(s) is compressed by bzip2. (Default: false)\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--star_output_genome_bam" + description: "Save the BAM file from STAR alignment under genomic coordinate to\ + \ 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate.\ + \ In this file, according to STAR's manual, 'paired \nends of an alignment are\ + \ always adjacent, and multiple alignments of a read are adjacent as well'.\ + \ \n(Default: false)\n" + info: null + direction: "input" +- name: "Advanced Options" + arguments: + - type: "string" + name: "--tag" + description: "The name of the optional field used in the SAM input for identifying\ + \ a read with too many valid \nalignments. The field should have the format\ + \ :i:, where a bigger than 0 \nindicates a read with\ + \ too many alignments. (Default: \"\")\n" + info: null + example: + - "" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fragment_length_min" + description: "Minimum read/insert length allowed. This is also the value for the\ + \ Bowtie/Bowtie2 -I option. \n(Default: 1)\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fragment_length_max" + description: "Maximum read/insert length allowed. This is also the value for the\ + \ Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--fragment_length_mean" + description: "(single-end data only) The mean of the fragment length distribution,\ + \ which is assumed to be a \nGaussian. (Default: -1, which disables use of the\ + \ fragment length distribution)\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--gragment_length_sd" + description: "(single-end data only) The standard deviation of the fragment length\ + \ distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes\ + \ that all fragments are of the same length, \ngiven by the rounded value of\ + \ --fragment_length_mean).\n" + info: null + example: + - 0.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--estimate_rspd" + description: "Set this option if you want to estimate the read start position\ + \ distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n" + info: null + direction: "input" + - type: "integer" + name: "--num_rspd_bins" + description: "Number of bins in the RSPD. Only relevant when '--estimate_rspd'\ + \ is specified. Use of the default \nsetting is recommended. (Default: 20)\n" + info: null + example: + - 20 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gibbs_burnin" + description: "The number of burn-in rounds for RSEM's Gibbs sampler. Each round\ + \ passes over the entire data set \nonce. If RSEM can use multiple threads,\ + \ multiple Gibbs samplers will start at the same time and all \nsamplers share\ + \ the same burn-in number. (Default: 200)\n" + info: null + example: + - 200 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gibbs_number_of_samples" + description: "The total number of count vectors RSEM will collect from its Gibbs\ + \ samplers. (Default: 1000)\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gibbs_sampling_gap" + description: "The number of rounds between two succinct count vectors RSEM collects.\ + \ If the count vector after \nround N is collected, the count vector after round\ + \ N + will also be collected. (Default: 1)\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--ci_credibility_level" + description: "The credibility level for credibility intervals. (Default: 0.95)\n" + info: null + example: + - 0.95 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--ci_number_of_samples_per_count_vector" + description: "The number of read generating probability vectors sampled per sampled\ + \ count vector. The crebility \nintervals are calculated by first sampling P(C\ + \ | D) and then sampling P(Theta | C) for each sampled \ncount vector. This\ + \ option controls how many Theta vectors are sampled per sampled count vector.\ + \ \n(Default: 50)\n" + info: null + example: + - 50 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--keep_intermediate_files" + description: "Keep temporary files generated by RSEM. RSEM creates a temporary\ + \ directory, 'sample_name.temp', \ninto which it puts all intermediate output\ + \ files. If this directory already exists, RSEM overwrites \nall files generated\ + \ by previous RSEM runs inside of it. By default, after RSEM finishes, the \n\ + temporary directory is deleted. Set this option to prevent the deletion of this\ + \ directory and the \nintermediate files inside of it.\n" + info: null + direction: "input" + - type: "string" + name: "--temporary_folder" + description: "Set where to put the temporary files generated by RSEM. If the folder\ + \ specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n" + info: null + example: + - "sample_name.temp" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--time" + description: "Output time consumed by each step of RSEM to 'sample_name.time'.\n" + info: null + direction: "input" +- name: "Prior-Enhanced RSEM Options" + arguments: + - type: "boolean_true" + name: "--run_pRSEM" + description: "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's\ + \ initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input\ + \ RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq\ + \ peak information to partition isoforms (e.g. in pRSEM's default \npartition\ + \ model), either ChIP-seq peak file (with the '--chipseq_peak_file' option)\ + \ or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables\ + \ are required (with the \n'--chipseq_target_read_files ', '--chipseq_control_read_files\ + \ ', and '--bowtie_path \n options), otherwise, ChIP-seq FASTQ\ + \ files for target and control and the path to Bowtie \nexecutables are required.\n" + info: null + direction: "input" + - type: "file" + name: "--chipseq_peak_file" + description: "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4,\ + \ format. This file is used \nwhen running prior-enhanced RSEM in the default\ + \ two-partition model. It partitions isoforms by \nwhether they have ChIP-seq\ + \ overlapping with their transcription start site region or not. Each \npartition\ + \ will have its own prior parameter learned from a training set. This file can\ + \ be either \ngzipped or ungzipped.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_target_read_files" + description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq target.\ + \ This option is used when running \nprior-enhanced RSEM. It provides information\ + \ to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped\ + \ or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path '\ + \ \nand '--chipseq_control_read_files ' must be defined when this option\ + \ is specified.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_control_read_files" + description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.\ + \ This option is used when running \nprior-enhanced RSEM. It provides information\ + \ to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with\ + \ a suffix '.gz' or '.gzip'. The options '--bowtie_path ' and \n'--chipseq_target_read_files\ + \ ' must be defined when this option is specified.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_read_files_multi_targets" + description: "Comma-separated full path of FASTQ read files for multiple ChIP-seq\ + \ targets. This option is used when \nrunning prior-enhanced RSEM, where prior\ + \ is learned from multiple complementary data sets. It provides \ninformation\ + \ to calculate ChIP-seq signals. All files can be either ungzipped or gzipped\ + \ with a suffix \n'.gz' or '.gzip'. When this option is specified, the option\ + \ '--bowtie_path ' must be defined and \nthe option '--partition_model\ + \ ' will be set to 'cmb_lgt' automatically.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--chipseq_bed_files_multi_targets" + description: "Comma-separated full path of BED files for multiple ChIP-seq targets.\ + \ This option is used when running \nprior-enhanced RSEM, where prior is learned\ + \ from multiple complementary data sets. It provides information \nof ChIP-seq\ + \ signals and must have at least the first six BED columns. All files can be\ + \ either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option\ + \ is specified, the option '--partition_model \n' will be set to 'cmb_lgt'\ + \ automatically.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--cap_stacked_chipseq_reads" + description: "Keep a maximum number of ChIP-seq reads that aligned to the same\ + \ genomic interval. This option is used \nwhen running prior-enhanced RSEM,\ + \ where prior is learned from multiple complementary data sets. This \noption\ + \ is only in use when either '--chipseq_read_files_multi_targets ' or\ + \ \n'--chipseq_bed_files_multi_targets ' is specified.\n" + info: null + direction: "input" + - type: "integer" + name: "--n_max_stacked_chipseq_reads" + description: "The maximum number of stacked ChIP-seq reads to keep. This option\ + \ is used when running prior-enhanced \nRSEM, where prior is learned from multiple\ + \ complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads'\ + \ is set.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--partition_model" + description: "A keyword to specify the partition model used by prior-enhanced\ + \ RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3,\ + \ lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk,\ + \ pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above\ + \ models are learned from a training set. For detailed explanations, please\ + \ \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n" + info: null + example: + - "pk" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Calculate expression with RSEM. \n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "Transcriptome" +- "Index" +- "Alignment" +- "RSEM" +license: "GPL-3.0" +references: + doi: + - "https://doi.org/10.1186/1471-2105-12-323" +links: + repository: "https://github.com/deweylab/RSEM" + homepage: "https://deweylab.github.io/RSEM/" + documentation: "https://deweylab.github.io/RSEM/rsem-calculate-expression.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "ubuntu:22.04" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "build-essential" + - "gcc" + - "g++" + - "make" + - "wget" + - "zlib1g-dev" + - "unzip" + interactive: false + - type: "docker" + run: + - "apt-get update && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip\ + \ && \\\nunzip 2.7.11a.zip && \\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR\ + \ /usr/local/bin && \\\ncd && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip\ + \ && \\\nunzip v1.3.3.zip && \\\ncd RSEM-1.3.3 && \\\nmake && \\\nmake install\n" + env: + - "STAR_VERSION=2.7.11b" + - "RSEM_VERSION=1.3.3" + - type: "docker" + run: + - "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\ + \ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\ + \ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\ + \ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\ + \ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\ + d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\ + \ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/rsem/rsem_calculate_expression/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/rsem/rsem_calculate_expression" + executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf" + viash_version: "0.9.0" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" +package_config: + name: "biobox" + version: "main" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf new file mode 100644 index 0000000..7961406 --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf @@ -0,0 +1,4370 @@ +// rsem_calculate_expression main +// +// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "rsem_calculate_expression", + "namespace" : "rsem", + "version" : "main", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "Sample ID.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--strandedness", + "description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse", + "required" : false, + "choices" : [ + "forward", + "reverse", + "unstranded" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "Paired-end reads or not?", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input reads for quantification.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--index", + "description" : "RSEM index.", + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extra_args", + "description" : "Extra rsem-calculate-expression arguments in addition to the examples.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--counts_gene", + "description" : "Expression counts on gene level", + "example" : [ + "$id.genes.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--counts_transcripts", + "description" : "Expression counts on transcript level", + "example" : [ + "$id.isoforms.results" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--stat", + "description" : "RSEM statistics", + "example" : [ + "$id.stat" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--logs", + "description" : "RSEM logs", + "example" : [ + "$id.log" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_star", + "description" : "BAM file generated by STAR (optional)", + "example" : [ + "$id.STAR.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_genome", + "description" : "Genome BAM file (optional)", + "example" : [ + "$id.genome.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--bam_transcript", + "description" : "Transcript BAM file (optional)", + "example" : [ + "$id.transcript.bam" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sort_bam_by_read_name", + "description" : "Sort BAM file aligned under transcript coordidate by read name. Setting this option on will produce \ndeterministic maximum likelihood estimations from independent runs. Note that sorting will take long \ntime and lots of memory.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--no_bam_output", + "description" : "Do not output any BAM file.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sampling_for_bam", + "description" : "When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure \nincludes the alignment to the \\"noise\\" transcript, which does not appear in the BAM file. Only the \nsampled alignment has a weight of 1. All other alignments have weight 0. If the \\"noise\\" transcript is \nsampled, all alignments appeared in the BAM file should have weight 0.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--output_genome_bam", + "description" : "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped to genomic coordinates and \nannotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM \npackage) to sort and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai' \nwill be generated.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sort_bam_by_coordinate", + "description" : "Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Basic Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--no_qualities", + "description" : "Input reads do not contain quality scores.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--alignments", + "description" : "Input file contains alignments in SAM/BAM/CRAM format. The exact file format will be determined \nautomatically.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--fai", + "description" : "If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on. is a FAI format file containing each reference sequence's \nname and length. Please refer to the SAM official website for the details of FAI format.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--bowtie2", + "description" : "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM does not handle indel, local \nand discordant alignments, the Bowtie2 parameters are set in a way to avoid those alignments. In \nparticular, we use options '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1' \nby default. The last parameter of '--score_min', '-0.1', is the negative of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'. If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star", + "description" : "Use STAR to align reads. Alignment parameters are from ENCODE3's STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory with name as 'sample_name.bam'. Each STAR job will have its own private copy of \nthe genome in memory.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--hisat2_hca", + "description" : "Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--append_names", + "description" : "If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results' and 'sample_name.genes.results'.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--seed", + "description" : "Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals. The seed must be a non-negative 32 bit integer.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--single_cell_prior", + "description" : "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which \nencourage the sparsity of the expression levels.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--calc_pme", + "description" : "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--calc_ci", + "description" : "Calculate 95% credibility intervals and posterior mean estimates. The credibility level can be \nchanged by setting '--ci_credibility_level'.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--quiet", + "alternatives" : [ + "-q" + ], + "description" : "Suppress the output of logging information.", + "direction" : "input" + } + ] + }, + { + "name" : "Aligner Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--seed_length", + "description" : "Seed length used by the read aligner. Providing the correct value is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's seed length parameter. Any read with its or at least \none of its mates' (for paired-end reads) length less than this value will be ignored. If the \nreferences are not added poly(A) tails, the minimum allowed value is 5, otherwise, the minimum \nallowed value is 25. Note that this script will only check if the value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default: 25)\n", + "example" : [ + 25 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--phred64_quals", + "description" : "Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--solexa_quals", + "description" : "Input quality scores are solexa encoded (from GA Pipeline ver. < 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--bowtie_n", + "description" : "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)\n", + "example" : [ + 2 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2, + 3 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie_e", + "description" : "(Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)\n", + "example" : [ + 99999999 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie_m", + "description" : "(Bowtie parameter) suppress all alignments for a read if > valid alignments exist. (Default: 200)\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie_chunkmbs", + "description" : "(Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie's default)\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--bowtie2_mismatch_rate", + "description" : "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default: 0.1)\n", + "example" : [ + 0.1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--bowtie2_k", + "description" : "(Bowtie 2 parameter) Find up to alignments per read. (Default: 200)\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--bowtie2_sensitivity_level", + "description" : "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end mode. This option controls how \nhard Bowtie 2 tries to find alignments. must be one of \\"very_fast\\", \\"fast\\", \\"sensitive\\" \nand \\"very_sensitive\\". The four candidates correspond to Bowtie 2's \\"--very-fast\\", \\"--fast\\", \n\\"--sensitive\\" and \\"--very-sensitive\\" options. (Default: \\"sensitive\\" - use Bowtie 2's default)\n", + "example" : [ + "sensitive" + ], + "required" : false, + "choices" : [ + "very_fast", + "fast", + "sensitive", + "very_sensitive" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--star_gzipped_read_file", + "description" : "Input read file(s) is compressed by gzip. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star_bzipped_read_file", + "description" : "Input read file(s) is compressed by bzip2. (Default: false)\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--star_output_genome_bam", + "description" : "Save the BAM file from STAR alignment under genomic coordinate to 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate. In this file, according to STAR's manual, 'paired \nends of an alignment are always adjacent, and multiple alignments of a read are adjacent as well'. \n(Default: false)\n", + "direction" : "input" + } + ] + }, + { + "name" : "Advanced Options", + "arguments" : [ + { + "type" : "string", + "name" : "--tag", + "description" : "The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments. The field should have the format :i:, where a bigger than 0 \nindicates a read with too many alignments. (Default: \\"\\")\n", + "example" : [ + "" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fragment_length_min", + "description" : "Minimum read/insert length allowed. This is also the value for the Bowtie/Bowtie2 -I option. \n(Default: 1)\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fragment_length_max", + "description" : "Maximum read/insert length allowed. This is also the value for the Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--fragment_length_mean", + "description" : "(single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian. (Default: -1, which disables use of the fragment length distribution)\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--gragment_length_sd", + "description" : "(single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes that all fragments are of the same length, \ngiven by the rounded value of --fragment_length_mean).\n", + "example" : [ + 0.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--estimate_rspd", + "description" : "Set this option if you want to estimate the read start position distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_rspd_bins", + "description" : "Number of bins in the RSPD. Only relevant when '--estimate_rspd' is specified. Use of the default \nsetting is recommended. (Default: 20)\n", + "example" : [ + 20 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gibbs_burnin", + "description" : "The number of burn-in rounds for RSEM's Gibbs sampler. Each round passes over the entire data set \nonce. If RSEM can use multiple threads, multiple Gibbs samplers will start at the same time and all \nsamplers share the same burn-in number. (Default: 200)\n", + "example" : [ + 200 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gibbs_number_of_samples", + "description" : "The total number of count vectors RSEM will collect from its Gibbs samplers. (Default: 1000)\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gibbs_sampling_gap", + "description" : "The number of rounds between two succinct count vectors RSEM collects. If the count vector after \nround N is collected, the count vector after round N + will also be collected. (Default: 1)\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--ci_credibility_level", + "description" : "The credibility level for credibility intervals. (Default: 0.95)\n", + "example" : [ + 0.95 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--ci_number_of_samples_per_count_vector", + "description" : "The number of read generating probability vectors sampled per sampled count vector. The crebility \nintervals are calculated by first sampling P(C | D) and then sampling P(Theta | C) for each sampled \ncount vector. This option controls how many Theta vectors are sampled per sampled count vector. \n(Default: 50)\n", + "example" : [ + 50 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--keep_intermediate_files", + "description" : "Keep temporary files generated by RSEM. RSEM creates a temporary directory, 'sample_name.temp', \ninto which it puts all intermediate output files. If this directory already exists, RSEM overwrites \nall files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the \ntemporary directory is deleted. Set this option to prevent the deletion of this directory and the \nintermediate files inside of it.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--temporary_folder", + "description" : "Set where to put the temporary files generated by RSEM. If the folder specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n", + "example" : [ + "sample_name.temp" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--time", + "description" : "Output time consumed by each step of RSEM to 'sample_name.time'.\n", + "direction" : "input" + } + ] + }, + { + "name" : "Prior-Enhanced RSEM Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--run_pRSEM", + "description" : "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq peak information to partition isoforms (e.g. in pRSEM's default \npartition model), either ChIP-seq peak file (with the '--chipseq_peak_file' option) or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables are required (with the \n'--chipseq_target_read_files ', '--chipseq_control_read_files ', and '--bowtie_path \n options), otherwise, ChIP-seq FASTQ files for target and control and the path to Bowtie \nexecutables are required.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--chipseq_peak_file", + "description" : "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4, format. This file is used \nwhen running prior-enhanced RSEM in the default two-partition model. It partitions isoforms by \nwhether they have ChIP-seq overlapping with their transcription start site region or not. Each \npartition will have its own prior parameter learned from a training set. This file can be either \ngzipped or ungzipped.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_target_read_files", + "description" : "Comma-separated full path of FASTQ read file(s) for ChIP-seq target. This option is used when running \nprior-enhanced RSEM. It provides information to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path ' \nand '--chipseq_control_read_files ' must be defined when this option is specified.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_control_read_files", + "description" : "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol. This option is used when running \nprior-enhanced RSEM. It provides information to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path ' and \n'--chipseq_target_read_files ' must be defined when this option is specified.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_read_files_multi_targets", + "description" : "Comma-separated full path of FASTQ read files for multiple ChIP-seq targets. This option is used when \nrunning prior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides \ninformation to calculate ChIP-seq signals. All files can be either ungzipped or gzipped with a suffix \n'.gz' or '.gzip'. When this option is specified, the option '--bowtie_path ' must be defined and \nthe option '--partition_model ' will be set to 'cmb_lgt' automatically.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--chipseq_bed_files_multi_targets", + "description" : "Comma-separated full path of BED files for multiple ChIP-seq targets. This option is used when running \nprior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides information \nof ChIP-seq signals and must have at least the first six BED columns. All files can be either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option is specified, the option '--partition_model \n' will be set to 'cmb_lgt' automatically.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--cap_stacked_chipseq_reads", + "description" : "Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval. This option is used \nwhen running prior-enhanced RSEM, where prior is learned from multiple complementary data sets. This \noption is only in use when either '--chipseq_read_files_multi_targets ' or \n'--chipseq_bed_files_multi_targets ' is specified.\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--n_max_stacked_chipseq_reads", + "description" : "The maximum number of stacked ChIP-seq reads to keep. This option is used when running prior-enhanced \nRSEM, where prior is learned from multiple complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads' is set.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--partition_model", + "description" : "A keyword to specify the partition model used by prior-enhanced RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3, lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above models are learned from a training set. For detailed explanations, please \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n", + "example" : [ + "pk" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Calculate expression with RSEM. \n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "Transcriptome", + "Index", + "Alignment", + "RSEM" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "https://doi.org/10.1186/1471-2105-12-323" + ] + }, + "links" : { + "repository" : "https://github.com/deweylab/RSEM", + "homepage" : "https://deweylab.github.io/RSEM/", + "documentation" : "https://deweylab.github.io/RSEM/rsem-calculate-expression.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "ubuntu:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "main", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "build-essential", + "gcc", + "g++", + "make", + "wget", + "zlib1g-dev", + "unzip" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "apt-get update && \\\\\napt-get clean && \\\\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip && \\\\\nunzip 2.7.11a.zip && \\\\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR /usr/local/bin && \\\\\ncd && \\\\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip && \\\\\nunzip v1.3.3.zip && \\\\\ncd RSEM-1.3.3 && \\\\\nmake && \\\\\nmake install\n" + ], + "env" : [ + "STAR_VERSION=2.7.11b", + "RSEM_VERSION=1.3.3" + ] + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`\\" > /var/software_versions.txt && \\\\\necho \\"STAR: `STAR --version`\\" >> /var/software_versions.txt && \\\\\necho \\"bowtie2: `bowtie2 --version | grep -oP '\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt && \\\\\necho \\"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\\\K\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt && \\\\\necho \\"HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \\\\K\\\\d+\\\\.\\\\d+\\\\.\\\\d+'`\\" >> /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/rsem/rsem_calculate_expression/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/rsem/rsem_calculate_expression", + "viash_version" : "0.9.0", + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" + }, + "package_config" : { + "name" : "biobox", + "version" : "main", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'main'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi ) +$( if [ ! -z ${VIASH_PAR_STAT+x} ]; then echo "${VIASH_PAR_STAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stat='&'#" ; else echo "# par_stat="; fi ) +$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM_STAR+x} ]; then echo "${VIASH_PAR_BAM_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_star='&'#" ; else echo "# par_bam_star="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM_GENOME+x} ]; then echo "${VIASH_PAR_BAM_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_genome='&'#" ; else echo "# par_bam_genome="; fi ) +$( if [ ! -z ${VIASH_PAR_BAM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_BAM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_transcript='&'#" ; else echo "# par_bam_transcript="; fi ) +$( if [ ! -z ${VIASH_PAR_SORT_BAM_BY_READ_NAME+x} ]; then echo "${VIASH_PAR_SORT_BAM_BY_READ_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sort_bam_by_read_name='&'#" ; else echo "# par_sort_bam_by_read_name="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_BAM_OUTPUT+x} ]; then echo "${VIASH_PAR_NO_BAM_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_bam_output='&'#" ; else echo "# par_no_bam_output="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLING_FOR_BAM+x} ]; then echo "${VIASH_PAR_SAMPLING_FOR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sampling_for_bam='&'#" ; else echo "# par_sampling_for_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_GENOME_BAM+x} ]; then echo "${VIASH_PAR_OUTPUT_GENOME_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_genome_bam='&'#" ; else echo "# par_output_genome_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_SORT_BAM_BY_COORDINATE+x} ]; then echo "${VIASH_PAR_SORT_BAM_BY_COORDINATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sort_bam_by_coordinate='&'#" ; else echo "# par_sort_bam_by_coordinate="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_QUALITIES+x} ]; then echo "${VIASH_PAR_NO_QUALITIES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_qualities='&'#" ; else echo "# par_no_qualities="; fi ) +$( if [ ! -z ${VIASH_PAR_ALIGNMENTS+x} ]; then echo "${VIASH_PAR_ALIGNMENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_alignments='&'#" ; else echo "# par_alignments="; fi ) +$( if [ ! -z ${VIASH_PAR_FAI+x} ]; then echo "${VIASH_PAR_FAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fai='&'#" ; else echo "# par_fai="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2+x} ]; then echo "${VIASH_PAR_BOWTIE2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2='&'#" ; else echo "# par_bowtie2="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR+x} ]; then echo "${VIASH_PAR_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star='&'#" ; else echo "# par_star="; fi ) +$( if [ ! -z ${VIASH_PAR_HISAT2_HCA+x} ]; then echo "${VIASH_PAR_HISAT2_HCA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hisat2_hca='&'#" ; else echo "# par_hisat2_hca="; fi ) +$( if [ ! -z ${VIASH_PAR_APPEND_NAMES+x} ]; then echo "${VIASH_PAR_APPEND_NAMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_append_names='&'#" ; else echo "# par_append_names="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi ) +$( if [ ! -z ${VIASH_PAR_SINGLE_CELL_PRIOR+x} ]; then echo "${VIASH_PAR_SINGLE_CELL_PRIOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single_cell_prior='&'#" ; else echo "# par_single_cell_prior="; fi ) +$( if [ ! -z ${VIASH_PAR_CALC_PME+x} ]; then echo "${VIASH_PAR_CALC_PME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_calc_pme='&'#" ; else echo "# par_calc_pme="; fi ) +$( if [ ! -z ${VIASH_PAR_CALC_CI+x} ]; then echo "${VIASH_PAR_CALC_CI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_calc_ci='&'#" ; else echo "# par_calc_ci="; fi ) +$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi ) +$( if [ ! -z ${VIASH_PAR_SEED_LENGTH+x} ]; then echo "${VIASH_PAR_SEED_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed_length='&'#" ; else echo "# par_seed_length="; fi ) +$( if [ ! -z ${VIASH_PAR_PHRED64_QUALS+x} ]; then echo "${VIASH_PAR_PHRED64_QUALS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred64_quals='&'#" ; else echo "# par_phred64_quals="; fi ) +$( if [ ! -z ${VIASH_PAR_SOLEXA_QUALS+x} ]; then echo "${VIASH_PAR_SOLEXA_QUALS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_solexa_quals='&'#" ; else echo "# par_solexa_quals="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_N+x} ]; then echo "${VIASH_PAR_BOWTIE_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_n='&'#" ; else echo "# par_bowtie_n="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_E+x} ]; then echo "${VIASH_PAR_BOWTIE_E}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_e='&'#" ; else echo "# par_bowtie_e="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_M+x} ]; then echo "${VIASH_PAR_BOWTIE_M}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_m='&'#" ; else echo "# par_bowtie_m="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE_CHUNKMBS+x} ]; then echo "${VIASH_PAR_BOWTIE_CHUNKMBS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie_chunkmbs='&'#" ; else echo "# par_bowtie_chunkmbs="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2_MISMATCH_RATE+x} ]; then echo "${VIASH_PAR_BOWTIE2_MISMATCH_RATE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2_mismatch_rate='&'#" ; else echo "# par_bowtie2_mismatch_rate="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2_K+x} ]; then echo "${VIASH_PAR_BOWTIE2_K}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2_k='&'#" ; else echo "# par_bowtie2_k="; fi ) +$( if [ ! -z ${VIASH_PAR_BOWTIE2_SENSITIVITY_LEVEL+x} ]; then echo "${VIASH_PAR_BOWTIE2_SENSITIVITY_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bowtie2_sensitivity_level='&'#" ; else echo "# par_bowtie2_sensitivity_level="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_GZIPPED_READ_FILE+x} ]; then echo "${VIASH_PAR_STAR_GZIPPED_READ_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_gzipped_read_file='&'#" ; else echo "# par_star_gzipped_read_file="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_BZIPPED_READ_FILE+x} ]; then echo "${VIASH_PAR_STAR_BZIPPED_READ_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_bzipped_read_file='&'#" ; else echo "# par_star_bzipped_read_file="; fi ) +$( if [ ! -z ${VIASH_PAR_STAR_OUTPUT_GENOME_BAM+x} ]; then echo "${VIASH_PAR_STAR_OUTPUT_GENOME_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_star_output_genome_bam='&'#" ; else echo "# par_star_output_genome_bam="; fi ) +$( if [ ! -z ${VIASH_PAR_TAG+x} ]; then echo "${VIASH_PAR_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tag='&'#" ; else echo "# par_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_MIN+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_MIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_min='&'#" ; else echo "# par_fragment_length_min="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_MAX+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_max='&'#" ; else echo "# par_fragment_length_max="; fi ) +$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_MEAN+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_MEAN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_mean='&'#" ; else echo "# par_fragment_length_mean="; fi ) +$( if [ ! -z ${VIASH_PAR_GRAGMENT_LENGTH_SD+x} ]; then echo "${VIASH_PAR_GRAGMENT_LENGTH_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gragment_length_sd='&'#" ; else echo "# par_gragment_length_sd="; fi ) +$( if [ ! -z ${VIASH_PAR_ESTIMATE_RSPD+x} ]; then echo "${VIASH_PAR_ESTIMATE_RSPD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_estimate_rspd='&'#" ; else echo "# par_estimate_rspd="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_RSPD_BINS+x} ]; then echo "${VIASH_PAR_NUM_RSPD_BINS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_rspd_bins='&'#" ; else echo "# par_num_rspd_bins="; fi ) +$( if [ ! -z ${VIASH_PAR_GIBBS_BURNIN+x} ]; then echo "${VIASH_PAR_GIBBS_BURNIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gibbs_burnin='&'#" ; else echo "# par_gibbs_burnin="; fi ) +$( if [ ! -z ${VIASH_PAR_GIBBS_NUMBER_OF_SAMPLES+x} ]; then echo "${VIASH_PAR_GIBBS_NUMBER_OF_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gibbs_number_of_samples='&'#" ; else echo "# par_gibbs_number_of_samples="; fi ) +$( if [ ! -z ${VIASH_PAR_GIBBS_SAMPLING_GAP+x} ]; then echo "${VIASH_PAR_GIBBS_SAMPLING_GAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gibbs_sampling_gap='&'#" ; else echo "# par_gibbs_sampling_gap="; fi ) +$( if [ ! -z ${VIASH_PAR_CI_CREDIBILITY_LEVEL+x} ]; then echo "${VIASH_PAR_CI_CREDIBILITY_LEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ci_credibility_level='&'#" ; else echo "# par_ci_credibility_level="; fi ) +$( if [ ! -z ${VIASH_PAR_CI_NUMBER_OF_SAMPLES_PER_COUNT_VECTOR+x} ]; then echo "${VIASH_PAR_CI_NUMBER_OF_SAMPLES_PER_COUNT_VECTOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ci_number_of_samples_per_count_vector='&'#" ; else echo "# par_ci_number_of_samples_per_count_vector="; fi ) +$( if [ ! -z ${VIASH_PAR_KEEP_INTERMEDIATE_FILES+x} ]; then echo "${VIASH_PAR_KEEP_INTERMEDIATE_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_keep_intermediate_files='&'#" ; else echo "# par_keep_intermediate_files="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMPORARY_FOLDER+x} ]; then echo "${VIASH_PAR_TEMPORARY_FOLDER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temporary_folder='&'#" ; else echo "# par_temporary_folder="; fi ) +$( if [ ! -z ${VIASH_PAR_TIME+x} ]; then echo "${VIASH_PAR_TIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_time='&'#" ; else echo "# par_time="; fi ) +$( if [ ! -z ${VIASH_PAR_RUN_PRSEM+x} ]; then echo "${VIASH_PAR_RUN_PRSEM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_pRSEM='&'#" ; else echo "# par_run_pRSEM="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_PEAK_FILE+x} ]; then echo "${VIASH_PAR_CHIPSEQ_PEAK_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_peak_file='&'#" ; else echo "# par_chipseq_peak_file="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_TARGET_READ_FILES+x} ]; then echo "${VIASH_PAR_CHIPSEQ_TARGET_READ_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_target_read_files='&'#" ; else echo "# par_chipseq_target_read_files="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_CONTROL_READ_FILES+x} ]; then echo "${VIASH_PAR_CHIPSEQ_CONTROL_READ_FILES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_control_read_files='&'#" ; else echo "# par_chipseq_control_read_files="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_READ_FILES_MULTI_TARGETS+x} ]; then echo "${VIASH_PAR_CHIPSEQ_READ_FILES_MULTI_TARGETS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_read_files_multi_targets='&'#" ; else echo "# par_chipseq_read_files_multi_targets="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIPSEQ_BED_FILES_MULTI_TARGETS+x} ]; then echo "${VIASH_PAR_CHIPSEQ_BED_FILES_MULTI_TARGETS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chipseq_bed_files_multi_targets='&'#" ; else echo "# par_chipseq_bed_files_multi_targets="; fi ) +$( if [ ! -z ${VIASH_PAR_CAP_STACKED_CHIPSEQ_READS+x} ]; then echo "${VIASH_PAR_CAP_STACKED_CHIPSEQ_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cap_stacked_chipseq_reads='&'#" ; else echo "# par_cap_stacked_chipseq_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_N_MAX_STACKED_CHIPSEQ_READS+x} ]; then echo "${VIASH_PAR_N_MAX_STACKED_CHIPSEQ_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_n_max_stacked_chipseq_reads='&'#" ; else echo "# par_n_max_stacked_chipseq_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_PARTITION_MODEL+x} ]; then echo "${VIASH_PAR_PARTITION_MODEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_partition_model='&'#" ; else echo "# par_partition_model="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +if [ "\\$par_strandedness" == 'forward' ]; then + strandedness='--strandedness forward' +elif [ "\\$par_strandedness" == 'reverse' ]; then + strandedness="--strandedness reverse" +else + strandedness='' +fi + +IFS=";" read -ra input <<< \\$par_input + +INDEX=\\$(find -L \\$par_index -name "*.grp" | sed 's/\\\\.grp\\$//') + +unset_if_false=( par_paired par_quiet par_no_bam_output par_sampling_for_bam par_no_qualities + par_alignments par_bowtie2 par_star par_hisat2_hca par_append_names + par_single_cell_prior par_calc_pme par_calc_ci par_phred64_quals + par_solexa_quals par_star_gzipped_read_file par_star_bzipped_read_file + par_star_output_genome_bam par_estimate_rspd par_keep_intermediate_files + par_time par_run_pRSEM par_cap_stacked_chipseq_reads par_sort_bam_by_read_name par_sort_bam_by_coordinate ) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +rsem-calculate-expression \\\\ + \\${par_quiet:+-q} \\\\ + \\${par_no_bam_output:+--no-bam-output} \\\\ + \\${par_sampling_for_bam:+--sampling-for-bam} \\\\ + \\${par_no_qualities:+--no-qualities} \\\\ + \\${par_alignments:+--alignments} \\\\ + \\${par_bowtie2:+--bowtie2} \\\\ + \\${par_star:+--star} \\\\ + \\${par_hisat2_hca:+--hisat2-hca} \\\\ + \\${par_append_names:+--append-names} \\\\ + \\${par_single_cell_prior:+--single-cell-prior} \\\\ + \\${par_calc_pme:+--calc-pme} \\\\ + \\${par_calc_ci:+--calc-ci} \\\\ + \\${par_phred64_quals:+--phred64-quals} \\\\ + \\${par_solexa_quals:+--solexa-quals} \\\\ + \\${par_star_gzipped_read_file:+--star-gzipped-read-file} \\\\ + \\${par_star_bzipped_read_file:+--star-bzipped-read-file} \\\\ + \\${par_star_output_genome_bam:+--star-output-genome-bam} \\\\ + \\${par_estimate_rspd:+--estimate-rspd} \\\\ + \\${par_keep_intermediate_files:+--keep-intermediate-files} \\\\ + \\${par_time:+--time} \\\\ + \\${par_run_pRSEM:+--run-pRSEM} \\\\ + \\${par_cap_stacked_chipseq_reads:+--cap-stacked-chipseq-reads} \\\\ + \\${par_sort_bam_by_read_name:+--sort-bam-by-read-name} \\\\ + \\${par_sort_bam_by_coordinate:+--sort-bam-by-coordinate} \\\\ + \\${par_fai:+--fai "\\$par_fai"} \\\\ + \\${par_seed:+--seed "\\$par_seed"} \\\\ + \\${par_seed_length:+--seed-length "\\$par_seed_length"} \\\\ + \\${par_bowtie_n:+--bowtie-n "\\$par_bowtie_n"} \\\\ + \\${par_bowtie_e:+--bowtie-e "\\$par_bowtie_e"} \\\\ + \\${par_bowtie_m:+--bowtie-m "\\$par_bowtie_m"} \\\\ + \\${par_bowtie_chunkmbs:+--bowtie-chunkmbs "\\$par_bowtie_chunkmbs"} \\\\ + \\${par_bowtie2_mismatch_rate:+--bowtie2-mismatch-rate "\\$par_bowtie2_mismatch_rate"} \\\\ + \\${par_bowtie2_k:+--bowtie2-k "\\$par_bowtie2_k"} \\\\ + \\${par_bowtie2_sensitivity_level:+--bowtie2-sensitivity-level "\\$par_bowtie2_sensitivity_level"} \\\\ + \\${par_tag:+--tag "\\$par_tag"} \\\\ + \\${par_fragment_length_min:+--fragment-length-min "\\$par_fragment_length_min"} \\\\ + \\${par_fragment_length_max:+--fragment-length-max "\\$par_fragment_length_max"} \\\\ + \\${par_fragment_length_mean:+--fragment-length-mean "\\$par_fragment_length_mean"} \\\\ + \\${par_fragment_length_sd:+--fragment-length-sd "\\$par_fragment_length_sd"} \\\\ + \\${par_num_rspd_bins:+--num-rspd-bins "\\$par_num_rspd_bins"} \\\\ + \\${par_gibbs_burnin:+--gibbs-burnin "\\$par_gibbs_burnin"} \\\\ + \\${par_gibbs_number_of_samples:+--gibbs-number-of-samples "\\$par_gibbs_number_of_samples"} \\\\ + \\${par_gibbs_sampling_gap:+--gibbs-sampling-gap "\\$par_gibbs_sampling_gap"} \\\\ + \\${par_ci_credibility_level:+--ci-credibility-level "\\$par_ci_credibility_level"} \\\\ + \\${par_ci_number_of_samples_per_count_vector:+--ci-number-of-samples-per-count-vector "\\$par_ci_number_of_samples_per_count_vector"} \\\\ + \\${par_temporary_folder:+--temporary-folder "\\$par_temporary_folder"} \\\\ + \\${par_chipseq_peak_file:+--chipseq-peak-file "\\$par_chipseq_peak_file"} \\\\ + \\${par_chipseq_target_read_files:+--chipseq-target-read-files "\\$par_chipseq_target_read_files"} \\\\ + \\${par_chipseq_control_read_files:+--chipseq-control-read-files "\\$par_chipseq_control_read_files"} \\\\ + \\${par_chipseq_read_files_multi_targets:+--chipseq-read-files-multi-targets "\\$par_chipseq_read_files_multi_targets"} \\\\ + \\${par_chipseq_bed_files_multi_targets:+--chipseq-bed-files-multi-targets "\\$par_chipseq_bed_files_multi_targets"} \\\\ + \\${par_n_max_stacked_chipseq_reads:+--n-max-stacked-chipseq-reads "\\$par_n_max_stacked_chipseq_reads"} \\\\ + \\${par_partition_model:+--partition-model "\\$par_partition_model"} \\\\ + \\$strandedness \\\\ + \\${par_paired:+--paired-end} \\\\ + \\${input[*]} \\\\ + \\$INDEX \\\\ + \\$par_id + +[[ -f "\\${par_id}.genes.results" ]] && mv "\\${par_id}.genes.results" \\$par_counts_gene +[[ -f "\\${par_id}.isoforms.results" ]] && mv "\\${par_id}.isoforms.results" \\$par_counts_transcripts +[[ -d "\\${par_id}.stat" ]] && mv "\\${par_id}.stat" \\$par_stat +[[ -f "\\${par_id}.log" ]] && mv "\\${par_id}.log" \\$par_logs +[[ -f "\\${par_id}.STAR.genome.bam" ]] && mv "\\${par_id}.STAR.genome.bam" \\$par_bam_star +[[ -f "\\${par_id}.genome.bam" ]] && mv "\\${par_id}.genome.bam" \\$par_bam_genome +[[ -f "\\${par_id}.transcript.bam" ]] && mv "\\${par_id}.transcript.bam" \\$par_bam_transcript +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/rsem/rsem_calculate_expression", + "tag" : "main" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/rsem/rsem_calculate_expression/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/nextflow.config similarity index 98% rename from target/nextflow/rsem/rsem_calculate_expression/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/nextflow.config index 6c00e43..9149cfe 100644 --- a/target/nextflow/rsem/rsem_calculate_expression/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/nextflow.config @@ -3,7 +3,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'Calculate expression with RSEM.\n' + description = 'Calculate expression with RSEM. \n' } process.container = 'nextflow/bash:latest' diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json new file mode 100644 index 0000000..c0aedae --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json @@ -0,0 +1,839 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "rsem_calculate_expression", +"description": "Calculate expression with RSEM. \n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "string", + "description": "Type: `string`. Sample ID", + "help_text": "Type: `string`. Sample ID." + + } + + + , + "strandedness": { + "type": + "string", + "description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity", + "help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, reverse", + "enum": ["forward", "reverse", "unstranded"] + + + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Paired-end reads or not?", + "help_text": "Type: `boolean_true`, default: `false`. Paired-end reads or not?" + , + "default": "False" + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification." + + } + + + , + "index": { + "type": + "string", + "description": "Type: `file`. RSEM index", + "help_text": "Type: `file`. RSEM index." + + } + + + , + "extra_args": { + "type": + "string", + "description": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples", + "help_text": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "counts_gene": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level", + "help_text": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level" + , + "default": "$id.$key.counts_gene.results" + } + + + , + "counts_transcripts": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level", + "help_text": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level" + , + "default": "$id.$key.counts_transcripts.results" + } + + + , + "stat": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics", + "help_text": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics" + , + "default": "$id.$key.stat.stat" + } + + + , + "logs": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs", + "help_text": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs" + , + "default": "$id.$key.logs.log" + } + + + , + "bam_star": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)", + "help_text": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)" + , + "default": "$id.$key.bam_star.bam" + } + + + , + "bam_genome": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)", + "help_text": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)" + , + "default": "$id.$key.bam_genome.bam" + } + + + , + "bam_transcript": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)", + "help_text": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)" + , + "default": "$id.$key.bam_transcript.bam" + } + + + , + "sort_bam_by_read_name": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name", + "help_text": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name. Setting this option on will produce \ndeterministic maximum likelihood estimations from independent runs. Note that sorting will take long \ntime and lots of memory.\n" + , + "default": "False" + } + + + , + "no_bam_output": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Do not output any BAM file", + "help_text": "Type: `boolean_true`, default: `false`. Do not output any BAM file." + , + "default": "False" + } + + + , + "sampling_for_bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities", + "help_text": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure \nincludes the alignment to the \"noise\" transcript, which does not appear in the BAM file. Only the \nsampled alignment has a weight of 1. All other alignments have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared in the BAM file should have weight 0.\n" + , + "default": "False" + } + + + , + "output_genome_bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name.genome.bam\u0027, with alignments mapped to genomic coordinates and \nannotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM \npackage) to sort and index the bam file. \u0027sample_name.genome.sorted.bam\u0027 and \u0027sample_name.genome.sorted.bam.bai\u0027 \nwill be generated.\n" + , + "default": "False" + } + + + , + "sort_bam_by_coordinate": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices", + "help_text": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices.\n" + , + "default": "False" + } + + +} +}, + + + "basic options" : { + "title": "Basic Options", + "type": "object", + "description": "No description", + "properties": { + + + "no_qualities": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores", + "help_text": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores." + , + "default": "False" + } + + + , + "alignments": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format", + "help_text": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format. The exact file format will be determined \nautomatically.\n" + , + "default": "False" + } + + + , + "fai": { + "type": + "string", + "description": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on", + "help_text": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on. \u003cfile\u003e is a FAI format file containing each reference sequence\u0027s \nname and length. Please refer to the SAM official website for the details of FAI format.\n" + + } + + + , + "bowtie2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads", + "help_text": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM does not handle indel, local \nand discordant alignments, the Bowtie2 parameters are set in a way to avoid those alignments. In \nparticular, we use options \u0027--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1\u0027 \nby default. The last parameter of \u0027--score_min\u0027, \u0027-0.1\u0027, is the negative of maximum mismatch rate. \nThis rate can be set by option \u0027--bowtie2_mismatch_rate\u0027. If reads are paired-end, we additionally \nuse options \u0027--no_mixed\u0027 and \u0027--no_discordant\u0027.\n" + , + "default": "False" + } + + + , + "star": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use STAR to align reads", + "help_text": "Type: `boolean_true`, default: `false`. Use STAR to align reads. Alignment parameters are from ENCODE3\u0027s STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR\u0027s Output BAM file is unsorted. It is stored in RSEM\u0027s \ntemporary directory with name as \u0027sample_name.bam\u0027. Each STAR job will have its own private copy of \nthe genome in memory.\n" + , + "default": "False" + } + + + , + "hisat2_hca": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast", + "help_text": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast.\n" + , + "default": "False" + } + + + , + "append_names": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name.isoforms.results\u0027 and \u0027sample_name.genes.results\u0027.\n" + , + "default": "False" + } + + + , + "seed": { + "type": + "integer", + "description": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals", + "help_text": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals. The seed must be a non-negative 32 bit integer.\n" + + } + + + , + "single_cell_prior": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals", + "help_text": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which \nencourage the sparsity of the expression levels.\n" + , + "default": "False" + } + + + , + "calc_pme": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates", + "help_text": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates." + , + "default": "False" + } + + + , + "calc_ci": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates", + "help_text": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates. The credibility level can be \nchanged by setting \u0027--ci_credibility_level\u0027.\n" + , + "default": "False" + } + + + , + "quiet": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Suppress the output of logging information", + "help_text": "Type: `boolean_true`, default: `false`. Suppress the output of logging information." + , + "default": "False" + } + + +} +}, + + + "aligner options" : { + "title": "Aligner Options", + "type": "object", + "description": "No description", + "properties": { + + + "seed_length": { + "type": + "integer", + "description": "Type: `integer`, example: `25`. Seed length used by the read aligner", + "help_text": "Type: `integer`, example: `25`. Seed length used by the read aligner. Providing the correct value is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie\u0027s seed length parameter. Any read with its or at least \none of its mates\u0027 (for paired-end reads) length less than this value will be ignored. If the \nreferences are not added poly(A) tails, the minimum allowed value is 5, otherwise, the minimum \nallowed value is 25. Note that this script will only check if the value \u003e= 5 and give a warning \nmessage if the value \u003c 25 but \u003e= 5. (Default: 25)\n" + + } + + + , + "phred64_quals": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver", + "help_text": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. \u003e= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n" + , + "default": "False" + } + + + , + "solexa_quals": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver", + "help_text": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver. \u003c 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n" + , + "default": "False" + } + + + , + "bowtie_n": { + "type": + "integer", + "description": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed", + "help_text": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)\n", + "enum": [0, 1, 2, 3] + + + } + + + , + "bowtie_e": { + "type": + "integer", + "description": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment", + "help_text": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)\n" + + } + + + , + "bowtie_m": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist", + "help_text": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist. (Default: 200)\n" + + } + + + , + "bowtie_chunkmbs": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n", + "help_text": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n" + + } + + + , + "bowtie2_mismatch_rate": { + "type": + "number", + "description": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed", + "help_text": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed. (Default: 0.1)\n" + + } + + + , + "bowtie2_k": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read", + "help_text": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read. (Default: 200)\n" + + } + + + , + "bowtie2_sensitivity_level": { + "type": + "string", + "description": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode", + "help_text": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode. This option controls how \nhard Bowtie 2 tries to find alignments. \u003cstring\u003e must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\". The four candidates correspond to Bowtie 2\u0027s \"--very-fast\", \"--fast\", \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\" - use Bowtie 2\u0027s default)\n", + "enum": ["very_fast", "fast", "sensitive", "very_sensitive"] + + + } + + + , + "star_gzipped_read_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip", + "help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip. (Default: false)\n" + , + "default": "False" + } + + + , + "star_bzipped_read_file": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2", + "help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2. (Default: false)\n" + , + "default": "False" + } + + + , + "star_output_genome_bam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name.STAR.genome.bam\u0027. \nThis file is NOT sorted by genomic coordinate. In this file, according to STAR\u0027s manual, \u0027paired \nends of an alignment are always adjacent, and multiple alignments of a read are adjacent as well\u0027. \n(Default: false)\n" + , + "default": "False" + } + + +} +}, + + + "advanced options" : { + "title": "Advanced Options", + "type": "object", + "description": "No description", + "properties": { + + + "tag": { + "type": + "string", + "description": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments", + "help_text": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments. The field should have the format \u003ctagName\u003e:i:\u003cvalue\u003e, where a \u003cvalue\u003e bigger than 0 \nindicates a read with too many alignments. (Default: \"\")\n" + + } + + + , + "fragment_length_min": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Minimum read/insert length allowed", + "help_text": "Type: `integer`, example: `1`. Minimum read/insert length allowed. This is also the value for the Bowtie/Bowtie2 -I option. \n(Default: 1)\n" + + } + + + , + "fragment_length_max": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Maximum read/insert length allowed", + "help_text": "Type: `integer`, example: `1000`. Maximum read/insert length allowed. This is also the value for the Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n" + + } + + + , + "fragment_length_mean": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian", + "help_text": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian. (Default: -1, which disables use of the fragment length distribution)\n" + + } + + + , + "gragment_length_sd": { + "type": + "number", + "description": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian", + "help_text": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes that all fragments are of the same length, \ngiven by the rounded value of --fragment_length_mean).\n" + + } + + + , + "estimate_rspd": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data", + "help_text": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n" + , + "default": "False" + } + + + , + "num_rspd_bins": { + "type": + "integer", + "description": "Type: `integer`, example: `20`. Number of bins in the RSPD", + "help_text": "Type: `integer`, example: `20`. Number of bins in the RSPD. Only relevant when \u0027--estimate_rspd\u0027 is specified. Use of the default \nsetting is recommended. (Default: 20)\n" + + } + + + , + "gibbs_burnin": { + "type": + "integer", + "description": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler", + "help_text": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler. Each round passes over the entire data set \nonce. If RSEM can use multiple threads, multiple Gibbs samplers will start at the same time and all \nsamplers share the same burn-in number. (Default: 200)\n" + + } + + + , + "gibbs_number_of_samples": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers", + "help_text": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers. (Default: 1000)\n" + + } + + + , + "gibbs_sampling_gap": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects", + "help_text": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects. If the count vector after \nround N is collected, the count vector after round N + \u003cint\u003e will also be collected. (Default: 1)\n" + + } + + + , + "ci_credibility_level": { + "type": + "number", + "description": "Type: `double`, example: `0.95`. The credibility level for credibility intervals", + "help_text": "Type: `double`, example: `0.95`. The credibility level for credibility intervals. (Default: 0.95)\n" + + } + + + , + "ci_number_of_samples_per_count_vector": { + "type": + "integer", + "description": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector", + "help_text": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector. The crebility \nintervals are calculated by first sampling P(C | D) and then sampling P(Theta | C) for each sampled \ncount vector. This option controls how many Theta vectors are sampled per sampled count vector. \n(Default: 50)\n" + + } + + + , + "keep_intermediate_files": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM", + "help_text": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM. RSEM creates a temporary directory, \u0027sample_name.temp\u0027, \ninto which it puts all intermediate output files. If this directory already exists, RSEM overwrites \nall files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the \ntemporary directory is deleted. Set this option to prevent the deletion of this directory and the \nintermediate files inside of it.\n" + , + "default": "False" + } + + + , + "temporary_folder": { + "type": + "string", + "description": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM", + "help_text": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM. If the folder specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n" + + } + + + , + "time": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name", + "help_text": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name.time\u0027.\n" + , + "default": "False" + } + + +} +}, + + + "prior-enhanced rsem options" : { + "title": "Prior-Enhanced RSEM Options", + "type": "object", + "description": "No description", + "properties": { + + + "run_pRSEM": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM)", + "help_text": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform\u0027s initial pseudo-count for \nRSEM\u0027s Gibbs sampling, will be learned from input RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq peak information to partition isoforms (e.g. in pRSEM\u0027s default \npartition model), either ChIP-seq peak file (with the \u0027--chipseq_peak_file\u0027 option) or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables are required (with the \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027, \u0027--chipseq_control_read_files \u003cstring\u003e\u0027, and \u0027--bowtie_path \n\u003cpath\u003e options), otherwise, ChIP-seq FASTQ files for target and control and the path to Bowtie \nexecutables are required.\n" + , + "default": "False" + } + + + , + "chipseq_peak_file": { + "type": + "string", + "description": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i", + "help_text": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i.e. BED6+4, format. This file is used \nwhen running prior-enhanced RSEM in the default two-partition model. It partitions isoforms by \nwhether they have ChIP-seq overlapping with their transcription start site region or not. Each \npartition will have its own prior parameter learned from a training set. This file can be either \ngzipped or ungzipped.\n" + + } + + + , + "chipseq_target_read_files": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target", + "help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target. This option is used when running \nprior-enhanced RSEM. It provides information to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 \nand \u0027--chipseq_control_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n" + + } + + + , + "chipseq_control_read_files": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol", + "help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol. This option is used when running \nprior-enhanced RSEM. It provides information to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 and \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n" + + } + + + , + "chipseq_read_files_multi_targets": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets", + "help_text": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets. This option is used when \nrunning prior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides \ninformation to calculate ChIP-seq signals. All files can be either ungzipped or gzipped with a suffix \n\u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--bowtie_path \u003cpath\u003e\u0027 must be defined and \nthe option \u0027--partition_model \u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n" + + } + + + , + "chipseq_bed_files_multi_targets": { + "type": + "string", + "description": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets", + "help_text": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets. This option is used when running \nprior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides information \nof ChIP-seq signals and must have at least the first six BED columns. All files can be either ungzipped \nor gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--partition_model \n\u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n" + + } + + + , + "cap_stacked_chipseq_reads": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval", + "help_text": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval. This option is used \nwhen running prior-enhanced RSEM, where prior is learned from multiple complementary data sets. This \noption is only in use when either \u0027--chipseq_read_files_multi_targets \u003cstring\u003e\u0027 or \n\u0027--chipseq_bed_files_multi_targets \u003cstring\u003e\u0027 is specified.\n" + , + "default": "False" + } + + + , + "n_max_stacked_chipseq_reads": { + "type": + "integer", + "description": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep", + "help_text": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep. This option is used when running prior-enhanced \nRSEM, where prior is learned from multiple complementary data sets. This option is only in use when the \noption \u0027--cap_stacked_chipseq_reads\u0027 is set.\n" + + } + + + , + "partition_model": { + "type": + "string", + "description": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM", + "help_text": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3, lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above models are learned from a training set. For detailed explanations, please \nsee prior-enhanced RSEM\u0027s paper. (Default: \u0027pk\u0027)\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/basic options" + }, + + { + "$ref": "#/definitions/aligner options" + }, + + { + "$ref": "#/definitions/advanced options" + }, + + { + "$ref": "#/definitions/prior-enhanced rsem options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml similarity index 69% rename from target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml index e1f297b..1bd8d27 100644 --- a/target/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/.config.vsh.yaml @@ -1,12 +1,28 @@ name: "rseqc_bamstat" namespace: "rseqc" version: "main" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" argument_groups: - name: "Input" arguments: - type: "file" - name: "--input" - description: "input alignment file in BAM or SAM format" + name: "--input_file" + alternatives: + - "-i" + description: "Input alignment file in BAM or SAM format." info: null must_exist: true create_parent: true @@ -15,14 +31,15 @@ argument_groups: multiple: false multiple_sep: ";" - type: "integer" - name: "--map_qual" + name: "--mapq" + alternatives: + - "-q" description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ - \ reads, default=30." + \ reads. Default: '30'.\n" info: null - default: + example: - 30 required: false - min: 0 direction: "input" multiple: false multiple_sep: ";" @@ -30,10 +47,8 @@ argument_groups: arguments: - type: "file" name: "--output" - description: "output file (txt) with mapping quality statistics" + description: "Output file (txt) with mapping quality statistics." info: null - default: - - "$id.mapping_quality.txt" must_exist: true create_parent: true required: false @@ -44,32 +59,30 @@ resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "Generate statistics from a bam file.\n" +description: "Generate statistics from a bam file." test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "test.paired_end.sorted.bam" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rseqc/bamstat/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +keywords: +- "rnaseq" +- "genomics" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts356" +links: + repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + homepage: "https://rseqc.sourceforge.net/" + documentation: "https://rseqc.sourceforge.net/#bam-stat-py" + issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" runners: - type: "executable" id: "executable" @@ -138,20 +151,19 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "python:3.10" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - - type: "apt" - packages: - - "python3-pip" - interactive: false - type: "python" user: false packages: - "RSeQC" upgrade: true + - type: "docker" + run: + - "echo \"RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)\" > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" @@ -163,31 +175,28 @@ build_info: output: "target/nextflow/rseqc/rseqc_bamstat" executable: "target/nextflow/rseqc/rseqc_bamstat/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/rseqc/rseqc_bamstat/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf similarity index 97% rename from target/nextflow/rseqc/rseqc_bamstat/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf index 82d5327..e64063b 100644 --- a/target/nextflow/rseqc/rseqc_bamstat/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf @@ -8,6 +8,9 @@ // authors of this component should specify the license in the header of such // files, or include a separate license file detailing the licenses of all included // files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) //////////////////////////// // VDSL3 helper functions // @@ -2807,14 +2810,40 @@ meta = [ "name" : "rseqc_bamstat", "namespace" : "rseqc", "version" : "main", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], "argument_groups" : [ { "name" : "Input", "arguments" : [ { "type" : "file", - "name" : "--input", - "description" : "input alignment file in BAM or SAM format", + "name" : "--input_file", + "alternatives" : [ + "-i" + ], + "description" : "Input alignment file in BAM or SAM format.", "must_exist" : true, "create_parent" : true, "required" : true, @@ -2824,13 +2853,15 @@ meta = [ }, { "type" : "integer", - "name" : "--map_qual", - "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", - "default" : [ + "name" : "--mapq", + "alternatives" : [ + "-q" + ], + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: '30'.\n", + "example" : [ 30 ], "required" : false, - "min" : 0, "direction" : "input", "multiple" : false, "multiple_sep" : ";" @@ -2843,10 +2874,7 @@ meta = [ { "type" : "file", "name" : "--output", - "description" : "output file (txt) with mapping quality statistics", - "default" : [ - "$id.mapping_quality.txt" - ], + "description" : "Output file (txt) with mapping quality statistics.", "must_exist" : true, "create_parent" : true, "required" : false, @@ -2864,7 +2892,7 @@ meta = [ "is_executable" : true } ], - "description" : "Generate statistics from a bam file.\n", + "description" : "Generate statistics from a bam file.", "test_resources" : [ { "type" : "bash_script", @@ -2873,38 +2901,31 @@ meta = [ }, { "type" : "file", - "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/rseqc/bamstat/main.nf" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "rnaseq", + "genomics" ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts356" + ] + }, + "links" : { + "repository" : "https://github.com/MonashBioinformaticsPlatform/RSeQC", + "homepage" : "https://rseqc.sourceforge.net/", + "documentation" : "https://rseqc.sourceforge.net/#bam-stat-py", + "issue_tracker" : "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" + }, "runners" : [ { "type" : "executable", @@ -2983,18 +3004,11 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "python:3.10", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", "setup" : [ - { - "type" : "apt", - "packages" : [ - "python3-pip" - ], - "interactive" : false - }, { "type" : "python", "user" : false, @@ -3002,6 +3016,12 @@ meta = [ "RSeQC" ], "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)\\" > /var/software_versions.txt\n" + ] } ] }, @@ -3014,46 +3034,36 @@ meta = [ "config" : "/workdir/root/repo/src/rseqc/rseqc_bamstat/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_bamstat", + "output" : "target/nextflow/rseqc/rseqc_bamstat", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3069,8 +3079,8 @@ tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FILE+x} ]; then echo "${VIASH_PAR_INPUT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_file='&'#" ; else echo "# par_input_file="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPQ+x} ]; then echo "${VIASH_PAR_MAPQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapq='&'#" ; else echo "# par_mapq="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) @@ -3094,11 +3104,12 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" ## VIASH END #!/bin/bash + set -eo pipefail bam_stat.py \\\\ - --input \\$par_input \\\\ - --mapq \\$par_map_qual \\\\ + --input-file "\\${par_input_file}" \\\\ + \\${par_mapq:+--mapq "\\${par_mapq}"} \\\\ > \\$par_output VIASHMAIN bash "$tempscript" @@ -3460,7 +3471,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/rseqc/rseqc_bamstat", + "image" : "vsh/biobox/rseqc/rseqc_bamstat", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/rseqc/rseqc_bamstat/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow.config similarity index 97% rename from target/nextflow/rseqc/rseqc_bamstat/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow.config index 38ad16d..b9cffed 100644 --- a/target/nextflow/rseqc/rseqc_bamstat/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow.config @@ -3,7 +3,8 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'Generate statistics from a bam file.\n' + description = 'Generate statistics from a bam file.' + author = 'Emma Rousseau' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json similarity index 84% rename from target/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json index 10fcdfd..21b1df1 100644 --- a/target/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/nextflow_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", "title": "rseqc_bamstat", -"description": "Generate statistics from a bam file.\n", +"description": "Generate statistics from a bam file.", "type": "object", "definitions": { @@ -14,23 +14,22 @@ "properties": { - "input": { + "input_file": { "type": "string", - "description": "Type: `file`, required. input alignment file in BAM or SAM format", - "help_text": "Type: `file`, required. input alignment file in BAM or SAM format" + "description": "Type: `file`, required. Input alignment file in BAM or SAM format", + "help_text": "Type: `file`, required. Input alignment file in BAM or SAM format." } , - "map_qual": { + "mapq": { "type": "integer", - "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", - "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." - , - "default":30 + "description": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads", + "help_text": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: \u002730\u0027.\n" + } @@ -48,10 +47,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. output file (txt) with mapping quality statistics", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. output file (txt) with mapping quality statistics" + "description": "Type: `file`, default: `$id.$key.output.output`. Output file (txt) with mapping quality statistics", + "help_text": "Type: `file`, default: `$id.$key.output.output`. Output file (txt) with mapping quality statistics." , - "default":"$id.$key.output.txt" + "default": "$id.$key.output.output" } diff --git a/target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml similarity index 71% rename from target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml index 438dde4..73ab053 100644 --- a/target/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/.config.vsh.yaml @@ -1,11 +1,27 @@ name: "rseqc_inferexperiment" namespace: "rseqc" version: "main" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" argument_groups: - name: "Input" arguments: - type: "file" - name: "--input" + name: "--input_file" + alternatives: + - "-i" description: "input alignment file in BAM or SAM format" info: null must_exist: true @@ -16,6 +32,8 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--refgene" + alternatives: + - "-r" description: "Reference gene model in bed format" info: null must_exist: true @@ -24,43 +42,47 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" - name: "--sample_size" - description: "Numer of reads sampled from SAM/BAM file, default = 200000." - info: null - default: - - 200000 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--map_qual" - description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ - \ reads, default=30." - info: null - default: - - 30 - required: false - min: 0 - direction: "input" - multiple: false - multiple_sep: ";" - name: "Output" arguments: - type: "file" name: "--output" - description: "output file (txt) of strandness report" + description: "Output file (txt) of strandness report." info: null - default: + example: - "$id.strandedness.txt" must_exist: true create_parent: true - required: false + required: true direction: "output" multiple: false multiple_sep: ";" +- name: "Options" + arguments: + - type: "integer" + name: "--sample_size" + alternatives: + - "-s" + description: "Number of reads sampled from SAM/BAM file. Default: 200000\n" + info: null + example: + - 200000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mapq" + alternatives: + - "-q" + description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ + \ reads. Default: 30\n" + info: null + example: + - 30 + required: false + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" @@ -71,28 +93,21 @@ test_resources: path: "test.sh" is_executable: true - type: "file" - path: "test.paired_end.sorted.bam" -- type: "file" - path: "test.bed12" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rseqc/inferexperiment/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts356" +links: + repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + homepage: "https://rseqc.sourceforge.net/" + documentation: "https://rseqc.sourceforge.net/#infer-experiment-py" + issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" runners: - type: "executable" id: "executable" @@ -161,20 +176,20 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "python:3.10" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - - type: "apt" - packages: - - "python3-pip" - interactive: false - type: "python" user: false packages: - "RSeQC" upgrade: true + - type: "docker" + run: + - "echo \"RSeQC - infer_experiment.py: $(infer_experiment.py --version | cut -d'\ + \ ' -f2)\" > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" @@ -186,31 +201,28 @@ build_info: output: "target/nextflow/rseqc/rseqc_inferexperiment" executable: "target/nextflow/rseqc/rseqc_inferexperiment/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/rseqc/rseqc_inferexperiment/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf similarity index 97% rename from target/nextflow/rseqc/rseqc_inferexperiment/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf index c40d0a5..1a007ad 100644 --- a/target/nextflow/rseqc/rseqc_inferexperiment/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf @@ -8,6 +8,9 @@ // authors of this component should specify the license in the header of such // files, or include a separate license file detailing the licenses of all included // files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) //////////////////////////// // VDSL3 helper functions // @@ -2807,13 +2810,39 @@ meta = [ "name" : "rseqc_inferexperiment", "namespace" : "rseqc", "version" : "main", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], "argument_groups" : [ { "name" : "Input", "arguments" : [ { "type" : "file", - "name" : "--input", + "name" : "--input_file", + "alternatives" : [ + "-i" + ], "description" : "input alignment file in BAM or SAM format", "must_exist" : true, "create_parent" : true, @@ -2825,6 +2854,9 @@ meta = [ { "type" : "file", "name" : "--refgene", + "alternatives" : [ + "-r" + ], "description" : "Reference gene model in bed format", "must_exist" : true, "create_parent" : true, @@ -2832,32 +2864,6 @@ meta = [ "direction" : "input", "multiple" : false, "multiple_sep" : ";" - }, - { - "type" : "integer", - "name" : "--sample_size", - "description" : "Numer of reads sampled from SAM/BAM file, default = 200000.", - "default" : [ - 200000 - ], - "required" : false, - "min" : 1, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "integer", - "name" : "--map_qual", - "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", - "default" : [ - 30 - ], - "required" : false, - "min" : 0, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" } ] }, @@ -2867,18 +2873,53 @@ meta = [ { "type" : "file", "name" : "--output", - "description" : "output file (txt) of strandness report", - "default" : [ + "description" : "Output file (txt) of strandness report.", + "example" : [ "$id.strandedness.txt" ], "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "output", "multiple" : false, "multiple_sep" : ";" } ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--sample_size", + "alternatives" : [ + "-s" + ], + "description" : "Number of reads sampled from SAM/BAM file. Default: 200000\n", + "example" : [ + 200000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mapq", + "alternatives" : [ + "-q" + ], + "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: 30\n", + "example" : [ + 30 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] } ], "resources" : [ @@ -2897,42 +2938,27 @@ meta = [ }, { "type" : "file", - "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/sarscov2/test.bed12" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/rseqc/inferexperiment/main.nf" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts356" + ] + }, + "links" : { + "repository" : "https://github.com/MonashBioinformaticsPlatform/RSeQC", + "homepage" : "https://rseqc.sourceforge.net/", + "documentation" : "https://rseqc.sourceforge.net/#infer-experiment-py", + "issue_tracker" : "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" + }, "runners" : [ { "type" : "executable", @@ -3011,18 +3037,11 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "python:3.10", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", "setup" : [ - { - "type" : "apt", - "packages" : [ - "python3-pip" - ], - "interactive" : false - }, { "type" : "python", "user" : false, @@ -3030,6 +3049,12 @@ meta = [ "RSeQC" ], "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSeQC - infer_experiment.py: $(infer_experiment.py --version | cut -d' ' -f2)\\" > /var/software_versions.txt\n" + ] } ] }, @@ -3042,46 +3067,36 @@ meta = [ "config" : "/workdir/root/repo/src/rseqc/rseqc_inferexperiment/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_inferexperiment", + "output" : "target/nextflow/rseqc/rseqc_inferexperiment", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3097,11 +3112,11 @@ tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FILE+x} ]; then echo "${VIASH_PAR_INPUT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_file='&'#" ; else echo "# par_input_file="; fi ) $( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) -$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPQ+x} ]; then echo "${VIASH_PAR_MAPQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapq='&'#" ; else echo "# par_mapq="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3127,10 +3142,10 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" set -eo pipefail infer_experiment.py \\\\ - -i \\$par_input \\\\ + -i \\$par_input_file \\\\ -r \\$par_refgene \\\\ - -s \\$par_sample_size \\\\ - -q \\$par_map_qual \\\\ + \\${par_sample_size:+-s "\\${par_sample_size}"} \\\\ + \\${par_mapq:+-q "\\${par_mapq}"} \\\\ > \\$par_output VIASHMAIN bash "$tempscript" @@ -3492,7 +3507,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/rseqc/rseqc_inferexperiment", + "image" : "vsh/biobox/rseqc/rseqc_inferexperiment", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/rseqc/rseqc_inferexperiment/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/nextflow.config similarity index 99% rename from target/nextflow/rseqc/rseqc_inferexperiment/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/nextflow.config index 77f7833..d10d30e 100644 --- a/target/nextflow/rseqc/rseqc_inferexperiment/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/nextflow.config @@ -4,6 +4,7 @@ manifest { nextflowVersion = '!>=20.12.1-edge' version = 'main' description = 'Infer strandedness from sequencing reads\n' + author = 'Emma Rousseau' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json similarity index 79% rename from target/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json index 2334e9c..5ac5937 100644 --- a/target/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/nextflow_schema.json @@ -14,7 +14,7 @@ "properties": { - "input": { + "input_file": { "type": "string", "description": "Type: `file`, required. input alignment file in BAM or SAM format", @@ -33,28 +33,6 @@ } - , - "sample_size": { - "type": - "integer", - "description": "Type: `integer`, default: `200000`. Numer of reads sampled from SAM/BAM file, default = 200000", - "help_text": "Type: `integer`, default: `200000`. Numer of reads sampled from SAM/BAM file, default = 200000." - , - "default":200000 - } - - - , - "map_qual": { - "type": - "integer", - "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", - "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." - , - "default":30 - } - - } }, @@ -69,10 +47,40 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.txt`. output file (txt) of strandness report", - "help_text": "Type: `file`, default: `$id.$key.output.txt`. output file (txt) of strandness report" + "description": "Type: `file`, required, default: `$id.$key.output.txt`, example: `$id.strandedness.txt`. Output file (txt) of strandness report", + "help_text": "Type: `file`, required, default: `$id.$key.output.txt`, example: `$id.strandedness.txt`. Output file (txt) of strandness report." , - "default":"$id.$key.output.txt" + "default": "$id.$key.output.txt" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "sample_size": { + "type": + "integer", + "description": "Type: `integer`, example: `200000`. Number of reads sampled from SAM/BAM file", + "help_text": "Type: `integer`, example: `200000`. Number of reads sampled from SAM/BAM file. Default: 200000\n" + + } + + + , + "mapq": { + "type": + "integer", + "description": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads", + "help_text": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads. Default: 30\n" + } @@ -120,6 +128,10 @@ "$ref": "#/definitions/output" }, + { + "$ref": "#/definitions/options" + }, + { "$ref": "#/definitions/nextflow input-output arguments" } diff --git a/target/nextflow/rseqc/rseqc_innerdistance/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml similarity index 72% rename from target/nextflow/rseqc/rseqc_innerdistance/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml index 8a7a04d..ccf4c4a 100644 --- a/target/nextflow/rseqc/rseqc_innerdistance/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/.config.vsh.yaml @@ -1,11 +1,27 @@ -name: "rseqc_innerdistance" +name: "rseqc_inner_distance" namespace: "rseqc" version: "main" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" argument_groups: - name: "Input" arguments: - type: "file" - name: "--input" + name: "--input_file" + alternatives: + - "-i" description: "input alignment file in BAM or SAM format" info: null must_exist: true @@ -16,6 +32,8 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--refgene" + alternatives: + - "-r" description: "Reference gene model in bed format" info: null must_exist: true @@ -26,55 +44,63 @@ argument_groups: multiple_sep: ";" - type: "integer" name: "--sample_size" - description: "Numer of reads sampled from SAM/BAM file, default = 200000." + alternatives: + - "-k" + description: "Numer of reads sampled from SAM/BAM file, default = 1000000." info: null - default: - - 200000 + example: + - 1000000 required: false - min: 1 direction: "input" multiple: false multiple_sep: ";" - type: "integer" - name: "--map_qual" + name: "--mapq" + alternatives: + - "-q" description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ \ reads, default=30." info: null - default: + example: - 30 required: false - min: 0 direction: "input" multiple: false multiple_sep: ";" - type: "integer" - name: "--lower_bound_size" + name: "--lower_bound" + alternatives: + - "-l" description: "Lower bound of inner distance (bp). This option is used for ploting\ \ histograme, default=-250." info: null - default: + example: - -250 required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" - name: "--upper_bound_size" + name: "--upper_bound" + alternatives: + - "-u" description: "Upper bound of inner distance (bp). This option is used for ploting\ \ histograme, default=250." info: null - default: + example: - 250 required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" - name: "--step_size" + name: "--step" + alternatives: + - "-s" description: "Step size (bp) of histograme. This option is used for plotting histogram,\ \ default=5." info: null - default: + example: - 5 required: false direction: "input" @@ -82,14 +108,22 @@ argument_groups: multiple_sep: ";" - name: "Output" arguments: + - type: "string" + name: "--output_prefix" + alternatives: + - "-o" + description: "Rrefix of output files." + info: null + required: true + direction: "input" + multiple: false + multiple_sep: ";" - type: "file" name: "--output_stats" description: "output file (txt) with summary statistics of inner distances of\ \ paired reads" info: null - default: - - "$id.inner_distance.stats" - must_exist: false + must_exist: true create_parent: true required: false direction: "output" @@ -99,9 +133,7 @@ argument_groups: name: "--output_dist" description: "output file (txt) with inner distances of all paired reads" info: null - default: - - "$id.inner_distance.txt" - must_exist: false + must_exist: true create_parent: true required: false direction: "output" @@ -112,9 +144,7 @@ argument_groups: description: "output file (txt) with frequencies of inner distances of all paired\ \ reads" info: null - default: - - "$id.inner_distance_freq.txt" - must_exist: false + must_exist: true create_parent: true required: false direction: "output" @@ -125,9 +155,7 @@ argument_groups: description: "output file (pdf) with histogram plot of of inner distances of all\ \ paired reads" info: null - default: - - "$id.inner_distance_plot.pdf" - must_exist: false + must_exist: true create_parent: true required: false direction: "output" @@ -138,9 +166,7 @@ argument_groups: description: "output file (R) with script of histogram plot of of inner distances\ \ of all paired reads" info: null - default: - - "$id.inner_distance_plot.r" - must_exist: false + must_exist: true create_parent: true required: false direction: "output" @@ -150,34 +176,27 @@ resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "Calculate inner distance between read pairs. \n" +description: "Calculate inner distance between read pairs.\n" test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "test.paired_end.sorted.bam" -- type: "file" - path: "test.bed12" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rseqc/innerdistance/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts356" +links: + repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC" + homepage: "https://rseqc.sourceforge.net/" + documentation: "https://rseqc.sourceforge.net/#inner-distance-py" + issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" runners: - type: "executable" id: "executable" @@ -246,14 +265,13 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "python:3.10" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - type: "apt" packages: - - "python3-pip" - "r-base" interactive: false - type: "python" @@ -261,42 +279,43 @@ engines: packages: - "RSeQC" upgrade: true + - type: "docker" + run: + - "echo \"RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' '\ + \ -f2)\" > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" id: "native" build_info: - config: "src/rseqc/rseqc_innerdistance/config.vsh.yaml" + config: "src/rseqc/rseqc_inner_distance/config.vsh.yaml" runner: "nextflow" engine: "docker|native" - output: "target/nextflow/rseqc/rseqc_innerdistance" - executable: "target/nextflow/rseqc/rseqc_innerdistance/main.nf" + output: "target/nextflow/rseqc/rseqc_inner_distance" + executable: "target/nextflow/rseqc/rseqc_inner_distance/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/rseqc/rseqc_innerdistance/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf similarity index 96% rename from target/nextflow/rseqc/rseqc_innerdistance/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf index 73a422a..de086a1 100644 --- a/target/nextflow/rseqc/rseqc_innerdistance/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf @@ -1,4 +1,4 @@ -// rseqc_innerdistance main +// rseqc_inner_distance main // // This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -8,6 +8,9 @@ // authors of this component should specify the license in the header of such // files, or include a separate license file detailing the licenses of all included // files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) //////////////////////////// // VDSL3 helper functions // @@ -2804,16 +2807,42 @@ nextflow.enable.dsl=2 meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ - "name" : "rseqc_innerdistance", + "name" : "rseqc_inner_distance", "namespace" : "rseqc", "version" : "main", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], "argument_groups" : [ { "name" : "Input", "arguments" : [ { "type" : "file", - "name" : "--input", + "name" : "--input_file", + "alternatives" : [ + "-i" + ], "description" : "input alignment file in BAM or SAM format", "must_exist" : true, "create_parent" : true, @@ -2825,6 +2854,9 @@ meta = [ { "type" : "file", "name" : "--refgene", + "alternatives" : [ + "-r" + ], "description" : "Reference gene model in bed format", "must_exist" : true, "create_parent" : true, @@ -2836,34 +2868,41 @@ meta = [ { "type" : "integer", "name" : "--sample_size", - "description" : "Numer of reads sampled from SAM/BAM file, default = 200000.", - "default" : [ - 200000 + "alternatives" : [ + "-k" + ], + "description" : "Numer of reads sampled from SAM/BAM file, default = 1000000.", + "example" : [ + 1000000 ], "required" : false, - "min" : 1, "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, { "type" : "integer", - "name" : "--map_qual", + "name" : "--mapq", + "alternatives" : [ + "-q" + ], "description" : "Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.", - "default" : [ + "example" : [ 30 ], "required" : false, - "min" : 0, "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, { "type" : "integer", - "name" : "--lower_bound_size", + "name" : "--lower_bound", + "alternatives" : [ + "-l" + ], "description" : "Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250.", - "default" : [ + "example" : [ -250 ], "required" : false, @@ -2873,9 +2912,12 @@ meta = [ }, { "type" : "integer", - "name" : "--upper_bound_size", + "name" : "--upper_bound", + "alternatives" : [ + "-u" + ], "description" : "Upper bound of inner distance (bp). This option is used for ploting histograme, default=250.", - "default" : [ + "example" : [ 250 ], "required" : false, @@ -2885,9 +2927,12 @@ meta = [ }, { "type" : "integer", - "name" : "--step_size", + "name" : "--step", + "alternatives" : [ + "-s" + ], "description" : "Step size (bp) of histograme. This option is used for plotting histogram, default=5.", - "default" : [ + "example" : [ 5 ], "required" : false, @@ -2900,14 +2945,23 @@ meta = [ { "name" : "Output", "arguments" : [ + { + "type" : "string", + "name" : "--output_prefix", + "alternatives" : [ + "-o" + ], + "description" : "Rrefix of output files.", + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, { "type" : "file", "name" : "--output_stats", "description" : "output file (txt) with summary statistics of inner distances of paired reads", - "default" : [ - "$id.inner_distance.stats" - ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2918,10 +2972,7 @@ meta = [ "type" : "file", "name" : "--output_dist", "description" : "output file (txt) with inner distances of all paired reads", - "default" : [ - "$id.inner_distance.txt" - ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2932,10 +2983,7 @@ meta = [ "type" : "file", "name" : "--output_freq", "description" : "output file (txt) with frequencies of inner distances of all paired reads", - "default" : [ - "$id.inner_distance_freq.txt" - ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2946,10 +2994,7 @@ meta = [ "type" : "file", "name" : "--output_plot", "description" : "output file (pdf) with histogram plot of of inner distances of all paired reads", - "default" : [ - "$id.inner_distance_plot.pdf" - ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2960,10 +3005,7 @@ meta = [ "type" : "file", "name" : "--output_plot_r", "description" : "output file (R) with script of histogram plot of of inner distances of all paired reads", - "default" : [ - "$id.inner_distance_plot.r" - ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2980,7 +3022,7 @@ meta = [ "is_executable" : true } ], - "description" : "Calculate inner distance between read pairs. \n", + "description" : "Calculate inner distance between read pairs.\n", "test_resources" : [ { "type" : "bash_script", @@ -2989,42 +3031,27 @@ meta = [ }, { "type" : "file", - "path" : "/testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/sarscov2/test.bed12" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/rseqc/innerdistance/main.nf" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts356" + ] + }, + "links" : { + "repository" : "https://github.com/MonashBioinformaticsPlatform/RSeQC", + "homepage" : "https://rseqc.sourceforge.net/", + "documentation" : "https://rseqc.sourceforge.net/#inner-distance-py", + "issue_tracker" : "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues" + }, "runners" : [ { "type" : "executable", @@ -3103,7 +3130,7 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "python:3.10", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", @@ -3111,7 +3138,6 @@ meta = [ { "type" : "apt", "packages" : [ - "python3-pip", "r-base" ], "interactive" : false @@ -3123,6 +3149,12 @@ meta = [ "RSeQC" ], "upgrade" : true + }, + { + "type" : "docker", + "run" : [ + "echo \\"RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' ' -f2)\\" > /var/software_versions.txt\n" + ] } ] }, @@ -3132,49 +3164,39 @@ meta = [ } ], "build_info" : { - "config" : "/workdir/root/repo/src/rseqc/rseqc_innerdistance/config.vsh.yaml", + "config" : "/workdir/root/repo/src/rseqc/rseqc_inner_distance/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_innerdistance", + "output" : "target/nextflow/rseqc/rseqc_inner_distance", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3190,13 +3212,14 @@ tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT_FILE+x} ]; then echo "${VIASH_PAR_INPUT_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_file='&'#" ; else echo "# par_input_file="; fi ) $( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) $( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) -$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) -$( if [ ! -z ${VIASH_PAR_LOWER_BOUND_SIZE+x} ]; then echo "${VIASH_PAR_LOWER_BOUND_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_lower_bound_size='&'#" ; else echo "# par_lower_bound_size="; fi ) -$( if [ ! -z ${VIASH_PAR_UPPER_BOUND_SIZE+x} ]; then echo "${VIASH_PAR_UPPER_BOUND_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_upper_bound_size='&'#" ; else echo "# par_upper_bound_size="; fi ) -$( if [ ! -z ${VIASH_PAR_STEP_SIZE+x} ]; then echo "${VIASH_PAR_STEP_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_step_size='&'#" ; else echo "# par_step_size="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPQ+x} ]; then echo "${VIASH_PAR_MAPQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapq='&'#" ; else echo "# par_mapq="; fi ) +$( if [ ! -z ${VIASH_PAR_LOWER_BOUND+x} ]; then echo "${VIASH_PAR_LOWER_BOUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_lower_bound='&'#" ; else echo "# par_lower_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_UPPER_BOUND+x} ]; then echo "${VIASH_PAR_UPPER_BOUND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_upper_bound='&'#" ; else echo "# par_upper_bound="; fi ) +$( if [ ! -z ${VIASH_PAR_STEP+x} ]; then echo "${VIASH_PAR_STEP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_step='&'#" ; else echo "# par_step="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_PREFIX+x} ]; then echo "${VIASH_PAR_OUTPUT_PREFIX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_prefix='&'#" ; else echo "# par_output_prefix="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT_DIST+x} ]; then echo "${VIASH_PAR_OUTPUT_DIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dist='&'#" ; else echo "# par_output_dist="; fi ) $( if [ ! -z ${VIASH_PAR_OUTPUT_FREQ+x} ]; then echo "${VIASH_PAR_OUTPUT_FREQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_freq='&'#" ; else echo "# par_output_freq="; fi ) @@ -3226,25 +3249,27 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" set -exo pipefail -prefix=\\$(openssl rand -hex 8) inner_distance.py \\\\ - -i \\$par_input \\\\ + -i \\$par_input_file \\\\ -r \\$par_refgene \\\\ - -o \\$prefix \\\\ - -k \\$par_sample_size \\\\ - -l \\$par_lower_bound_size \\\\ - -u \\$par_upper_bound_size \\\\ - -s \\$par_step_size \\\\ - -q \\$par_map_qual \\\\ + -o \\$par_output_prefix \\\\ + \\${par_sample_size:+-k "\\${par_sample_size}"} \\\\ + \\${par_lower_bound:+-l "\\${par_lower_bound}"} \\\\ + \\${par_upper_bound:+-u "\\${par_upper_bound}"} \\\\ + \\${par_step:+-s "\\${par_step}"} \\\\ + \\${par_mapq:+-q "\\${par_mapq}"} \\\\ > stdout.txt -head -n 2 stdout.txt > \\$par_output_stats +if [[ -n \\$par_output_stats ]]; then head -n 2 stdout.txt > \\$par_output_stats; fi -[[ -f "\\$prefix.inner_distance.txt" ]] && mv \\$prefix.inner_distance.txt \\$par_output_dist -[[ -f "\\$prefix.inner_distance_plot.pdf" ]] && mv \\$prefix.inner_distance_plot.pdf \\$par_output_plot -[[ -f "\\$prefix.inner_distance_plot.r" ]] && mv \\$prefix.inner_distance_plot.r \\$par_output_plot_r -[[ -f "\\$prefix.inner_distance_freq.txt" ]] && mv \\$prefix.inner_distance_freq.txt \\$par_output_freq + +[[ -n "\\$par_output_dist" && -f "\\$par_output_prefix.inner_distance.txt" ]] && mv \\$par_output_prefix.inner_distance.txt \\$par_output_dist +[[ -n "\\$par_output_plot" && -f "\\$par_output_prefix.inner_distance_plot.pdf" ]] && mv \\$par_output_prefix.inner_distance_plot.pdf \\$par_output_plot +[[ -n "\\$par_output_plot_r" && -f "\\$par_output_prefix.inner_distance_plot.r" ]] && mv \\$par_output_prefix.inner_distance_plot.r \\$par_output_plot_r +[[ -n "\\$par_output_freq" && -f "\\$par_output_prefix.inner_distance_freq.txt" ]] && mv \\$par_output_prefix.inner_distance_freq.txt \\$par_output_freq + +exit 0 VIASHMAIN bash "$tempscript" ''' @@ -3605,7 +3630,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/rseqc/rseqc_innerdistance", + "image" : "vsh/biobox/rseqc/rseqc_inner_distance", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/kallisto/kallisto_quant/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow.config similarity index 96% rename from target/nextflow/kallisto/kallisto_quant/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow.config index 9648f48..b7c9979 100644 --- a/target/nextflow/kallisto/kallisto_quant/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow.config @@ -1,9 +1,10 @@ manifest { - name = 'kallisto/kallisto_quant' + name = 'rseqc/rseqc_inner_distance' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'Computes equivalence classes for reads and quantifies abundances.\n' + description = 'Calculate inner distance between read pairs.\n' + author = 'Emma Rousseau' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/rseqc/rseqc_innerdistance/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json similarity index 69% rename from target/nextflow/rseqc/rseqc_innerdistance/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json index 85f21e3..34e9318 100644 --- a/target/nextflow/rseqc/rseqc_innerdistance/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/nextflow_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", -"title": "rseqc_innerdistance", -"description": "Calculate inner distance between read pairs. \n", +"title": "rseqc_inner_distance", +"description": "Calculate inner distance between read pairs.\n", "type": "object", "definitions": { @@ -14,7 +14,7 @@ "properties": { - "input": { + "input_file": { "type": "string", "description": "Type: `file`, required. input alignment file in BAM or SAM format", @@ -37,54 +37,49 @@ "sample_size": { "type": "integer", - "description": "Type: `integer`, default: `200000`. Numer of reads sampled from SAM/BAM file, default = 200000", - "help_text": "Type: `integer`, default: `200000`. Numer of reads sampled from SAM/BAM file, default = 200000." - , - "default":200000 + "description": "Type: `integer`, example: `1000000`. Numer of reads sampled from SAM/BAM file, default = 1000000", + "help_text": "Type: `integer`, example: `1000000`. Numer of reads sampled from SAM/BAM file, default = 1000000." + } , - "map_qual": { + "mapq": { "type": "integer", - "description": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", - "help_text": "Type: `integer`, default: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." - , - "default":30 + "description": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30", + "help_text": "Type: `integer`, example: `30`. Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30." + } , - "lower_bound_size": { + "lower_bound": { "type": "integer", - "description": "Type: `integer`, default: `-250`. Lower bound of inner distance (bp)", - "help_text": "Type: `integer`, default: `-250`. Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250." - , - "default":-250 + "description": "Type: `integer`, example: `-250`. Lower bound of inner distance (bp)", + "help_text": "Type: `integer`, example: `-250`. Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250." + } , - "upper_bound_size": { + "upper_bound": { "type": "integer", - "description": "Type: `integer`, default: `250`. Upper bound of inner distance (bp)", - "help_text": "Type: `integer`, default: `250`. Upper bound of inner distance (bp). This option is used for ploting histograme, default=250." - , - "default":250 + "description": "Type: `integer`, example: `250`. Upper bound of inner distance (bp)", + "help_text": "Type: `integer`, example: `250`. Upper bound of inner distance (bp). This option is used for ploting histograme, default=250." + } , - "step_size": { + "step": { "type": "integer", - "description": "Type: `integer`, default: `5`. Step size (bp) of histograme", - "help_text": "Type: `integer`, default: `5`. Step size (bp) of histograme. This option is used for plotting histogram, default=5." - , - "default":5 + "description": "Type: `integer`, example: `5`. Step size (bp) of histograme", + "help_text": "Type: `integer`, example: `5`. Step size (bp) of histograme. This option is used for plotting histogram, default=5." + } @@ -99,13 +94,23 @@ "properties": { + "output_prefix": { + "type": + "string", + "description": "Type: `string`, required. Rrefix of output files", + "help_text": "Type: `string`, required. Rrefix of output files." + + } + + + , "output_stats": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_stats.stats`. output file (txt) with summary statistics of inner distances of paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_stats.stats`. output file (txt) with summary statistics of inner distances of paired reads" + "description": "Type: `file`, default: `$id.$key.output_stats.output_stats`. output file (txt) with summary statistics of inner distances of paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_stats.output_stats`. output file (txt) with summary statistics of inner distances of paired reads" , - "default":"$id.$key.output_stats.stats" + "default": "$id.$key.output_stats.output_stats" } @@ -113,10 +118,10 @@ "output_dist": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_dist.txt`. output file (txt) with inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_dist.txt`. output file (txt) with inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_dist.output_dist`. output file (txt) with inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_dist.output_dist`. output file (txt) with inner distances of all paired reads" , - "default":"$id.$key.output_dist.txt" + "default": "$id.$key.output_dist.output_dist" } @@ -124,10 +129,10 @@ "output_freq": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_freq.txt`. output file (txt) with frequencies of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_freq.output_freq`. output file (txt) with frequencies of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_freq.output_freq`. output file (txt) with frequencies of inner distances of all paired reads" , - "default":"$id.$key.output_freq.txt" + "default": "$id.$key.output_freq.output_freq" } @@ -135,10 +140,10 @@ "output_plot": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_plot.pdf`. output file (pdf) with histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_plot.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_plot.output_plot`. output file (pdf) with histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.output_plot.pdf" + "default": "$id.$key.output_plot.output_plot" } @@ -146,10 +151,10 @@ "output_plot_r": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_plot_r.r`. output file (R) with script of histogram plot of of inner distances of all paired reads", - "help_text": "Type: `file`, default: `$id.$key.output_plot_r.r`. output file (R) with script of histogram plot of of inner distances of all paired reads" + "description": "Type: `file`, default: `$id.$key.output_plot_r.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads", + "help_text": "Type: `file`, default: `$id.$key.output_plot_r.output_plot_r`. output file (R) with script of histogram plot of of inner distances of all paired reads" , - "default":"$id.$key.output_plot_r.r" + "default": "$id.$key.output_plot_r.output_plot_r" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml new file mode 100644 index 0000000..4b8fc1f --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/.config.vsh.yaml @@ -0,0 +1,621 @@ +name: "sortmerna" +version: "main" +argument_groups: +- name: "Input" + arguments: + - type: "boolean_true" + name: "--paired" + description: "Reads are paired-end. If a single reads file is provided, use this\ + \ option \nto indicate the file contains interleaved paired reads when neither\n\ + 'paired_in' | 'paired_out' | 'out2' | 'sout' are specified.\n" + info: null + direction: "input" + - type: "file" + name: "--input" + description: "Input fastq" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--ref" + description: "Reference fasta file(s) for rRNA database." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "file" + name: "--ribo_database_manifest" + description: "Text file containing paths to fasta files (one per line) that will\ + \ be used to create the database for SortMeRNA." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Output" + arguments: + - type: "file" + name: "--log" + description: "Sortmerna log file." + info: null + example: + - "$id.sortmerna.log" + must_exist: false + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output" + alternatives: + - "--aligned" + description: "Directory and file prefix for aligned output. The appropriate extension:\ + \ \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf 'dir' is not specified,\ + \ the output is created in the WORKDIR/out/.\nIf 'pfx' is not specified, the\ + \ prefix 'aligned' is used.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--other" + description: "Create Non-aligned reads output file with this path/prefix. Must\ + \ be used with fastx." + info: null + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "string" + name: "--kvdb" + description: "Path to directory of the key-value database file, used for storing\ + \ the alignment results." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--idx_dir" + description: "Path to the directory for storing the reference index files." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--readb" + description: "Path to the directory for storing pre-processed reads." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--fastx" + description: "Output aligned reads into FASTA/FASTQ file" + info: null + direction: "input" + - type: "boolean_true" + name: "--sam" + description: "Output SAM alignment for aligned reads." + info: null + direction: "input" + - type: "boolean_true" + name: "--sq" + description: "Add SQ tags to the SAM file" + info: null + direction: "input" + - type: "string" + name: "--blast" + description: "Blast options:\n* '0' - pairwise\n* '1' \ + \ - tabular(Blast - m 8 format)\n* '1 cigar' - tabular\ + \ + column for CIGAR\n* '1 cigar qcov' - tabular + columns for CIGAR\ + \ and query coverage\n* '1 cigar qcov qstrand' - tabular + columns for CIGAR,\ + \ query coverage and strand\n" + info: null + required: false + choices: + - "0" + - "1" + - "1 cigar" + - "1 cigar qcov" + - "1 cigar qcov qstrand" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--num_alignments" + description: "Report first INT alignments per read reaching E-value. If Int =\ + \ 0, all alignments will be output. Default: '0'\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_lis" + description: "search all alignments having the first INT longest LIS. LIS stands\ + \ for Longest Increasing Subsequence, it is\ncomputed using seeds' positions\ + \ to expand hits into longer matches prior to Smith-Waterman alignment. Default:\ + \ '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--print_all_reads" + description: "output null alignment strings for non-aligned reads to SAM and/or\ + \ BLAST tabular files." + info: null + direction: "input" + - type: "boolean_true" + name: "--paired_in" + description: "In the case where a pair of reads is aligned with a score above\ + \ the threshold, the output of the reads is controlled\nby the following options:\n\ + * --paired_in and --paired_out are both false: Only one read per pair is output\ + \ to the aligned fasta file.\n* --paired_in is true and --paired_out is false:\ + \ Both reads of the pair are output to the aligned fasta file.\n* --paired_in\ + \ is false and --paired_out is true: Both reads are output the the other fasta\ + \ file (if it is specified).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--paired_out" + description: "See description of --paired_in." + info: null + direction: "input" + - type: "boolean_true" + name: "--out2" + description: "Output paired reads into separate files. Must be used with '--fastx'.\ + \ If a single reads file is provided, this options\nimplies interleaved paired\ + \ reads. When used with 'sout', four (4) output files for aligned reads will\ + \ be generated:\n'aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd,\ + \ aligned-singleton-rev'. If 'other' option is also used,\neight (8) output\ + \ files will be generated.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--sout" + description: "Separate paired and singleton aligned reads. Must be used with '--fastx'.\ + \ If a single reads file is provided,\nthis options implies interleaved paired\ + \ reads. Cannot be used with '--paired_in' or '--paired_out'.\n" + info: null + direction: "input" + - type: "string" + name: "--zip_out" + description: "Compress the output files. The possible values are: \n* '1/true/t/yes/y'\n\ + * '0/false/f/no/n'\n*'-1' (the same format as input - default)\nThe values are\ + \ Not case sensitive.\n" + info: null + example: + - "-1" + required: false + choices: + - "1" + - "true" + - "t" + - "yes" + - "y" + - "0" + - "false" + - "f" + - "no" + - "n" + - "-1" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--match" + description: "Smith-Waterman score for a match (positive integer). Default: '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--mismatch" + description: "Smith-Waterman penalty for a mismatch (negative integer). Default:\ + \ '-3'.\n" + info: null + example: + - -3 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gap_open" + description: "Smith-Waterman penalty for introducing a gap (positive integer).\ + \ Default: '5'.\n" + info: null + example: + - 5 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--gap_ext" + description: "Smith-Waterman penalty for extending a gap (positive integer). Default:\ + \ '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--N" + description: "Smith-Waterman penalty for ambiguous letters (N's) scored as --mismatch.\ + \ Default: '-1'.\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--a" + description: "Number of threads to use. Default: '1'.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--e" + description: "E-value threshold. Default: '1'.\n" + info: null + example: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--F" + description: "Search only the forward strand." + info: null + direction: "input" + - type: "boolean_true" + name: "--R" + description: "Search only the reverse-complementary strand." + info: null + direction: "input" + - type: "integer" + name: "--num_alignment" + description: "Report first INT alignments per read reaching E-value (--num_alignments\ + \ 0 signifies all alignments will be output).\nDefault: '-1'\n" + info: null + example: + - -1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--best" + description: "Report INT best alignments per read reaching E-value by searching\ + \ --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments\ + \ will be searched) Default: '1'.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--verbose" + alternatives: + - "-v" + description: "Verbose output." + info: null + direction: "input" +- name: "OTU picking options" + arguments: + - type: "double" + name: "--id" + description: "%id similarity threshold (the alignment must still pass the E-value\ + \ threshold). Default: '0.97'.\n" + info: null + example: + - 0.97 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--coverage" + description: "%query coverage threshold (the alignment must still pass the E-value\ + \ threshold). Default: '0.97'.\n" + info: null + example: + - 0.97 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--de_novo" + description: "FASTA/FASTQ file for reads matching database < %id off (set using\ + \ --id) and < %cov (set using --coverage)\n(alignment must still pass the E-value\ + \ threshold).\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--otu_map" + description: "Output OTU map (input to QIIME's make_otu_table.py).\n" + info: null + direction: "input" +- name: "Advanced options" + arguments: + - type: "integer" + name: "--num_seed" + description: "Number of seeds matched before searching for candidate LIS. Default:\ + \ '2'.\n" + info: null + example: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--passes" + description: "Three intervals at which to place the seed on the read L,L/2,3 (L\ + \ is the seed length set in ./indexdb_rna).\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--edge" + description: "The number (or percentage if followed by %) of nucleotides to add\ + \ to each edge of the alignment region on the\nreference sequence before performing\ + \ Smith-Waterman alignment. Default: '4'.\n" + info: null + example: + - "4" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--full_search" + description: "Search for all 0-error and 1-error seed off matches in the index\ + \ rather than stopping after finding a 0-error match\n(<1% gain in sensitivity\ + \ with up four-fold decrease in speed).\n" + info: null + direction: "input" +- name: "Indexing Options" + arguments: + - type: "integer" + name: "--index" + description: "Create index files for the reference database. By default when this\ + \ option is not used, the program checks the\nreference index and builds it\ + \ if not already existing.\nThis can be changed by using '-index' as follows:\n\ + * '-index 0' - skip indexing. If the index does not exist, the program will\ + \ terminate\n and warn to build the index prior performing\ + \ the alignment\n* '-index 1' - only perform the indexing and terminate\n* '-index\ + \ 2' - the default behaviour, the same as when not using this option at all\n" + info: null + example: + - 2 + required: false + choices: + - 0 + - 1 + - 2 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "-L" + description: "Indexing seed length. Default: '18'\n" + info: null + example: + - 18.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--interval" + description: "Index every Nth L-mer in the reference database. Default: '1'\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_pos" + description: "Maximum number of positions to store for each unique L-mer. Set\ + \ to 0 to store all positions. Default: '1000'\n" + info: null + example: + - 1000 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Local sequence alignment tool for filtering, mapping and clustering.\ + \ The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic\ + \ data.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "sort" +- "mRNA" +- "rRNA" +- "alignment" +- "filtering" +- "mapping" +- "clustering" +license: "GPL-3.0" +references: + doi: + - "10.1093/bioinformatics/bts611" +links: + repository: "https://github.com/sortmerna/sortmerna" + homepage: "https://sortmerna.readthedocs.io/en/latest/" + documentation: "https://sortmerna.readthedocs.io/en/latest/manual4.0.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \\([0-9]\\+\\.[0-9]\\\ + +\\.[0-9]\\+\\).*/\\1/p'`\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/sortmerna/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/sortmerna" + executable: "target/nextflow/sortmerna/main.nf" + viash_version: "0.9.0" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" +package_config: + name: "biobox" + version: "main" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf new file mode 100644 index 0000000..9bafb9d --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf @@ -0,0 +1,4152 @@ +// sortmerna main +// +// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "sortmerna", + "version" : "main", + "argument_groups" : [ + { + "name" : "Input", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "Reads are paired-end. If a single reads file is provided, use this option \nto indicate the file contains interleaved paired reads when neither\n'paired_in' | 'paired_out' | 'out2' | 'sout' are specified.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--input", + "description" : "Input fastq", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ref", + "description" : "Reference fasta file(s) for rRNA database.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--ribo_database_manifest", + "description" : "Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Output", + "arguments" : [ + { + "type" : "file", + "name" : "--log", + "description" : "Sortmerna log file.", + "example" : [ + "$id.sortmerna.log" + ], + "must_exist" : false, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--aligned" + ], + "description" : "Directory and file prefix for aligned output. The appropriate extension: \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf 'dir' is not specified, the output is created in the WORKDIR/out/.\nIf 'pfx' is not specified, the prefix 'aligned' is used.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--other", + "description" : "Create Non-aligned reads output file with this path/prefix. Must be used with fastx.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "string", + "name" : "--kvdb", + "description" : "Path to directory of the key-value database file, used for storing the alignment results.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--idx_dir", + "description" : "Path to the directory for storing the reference index files.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--readb", + "description" : "Path to the directory for storing pre-processed reads.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--fastx", + "description" : "Output aligned reads into FASTA/FASTQ file", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sam", + "description" : "Output SAM alignment for aligned reads.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sq", + "description" : "Add SQ tags to the SAM file", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--blast", + "description" : "Blast options:\n* '0' - pairwise\n* '1' - tabular(Blast - m 8 format)\n* '1 cigar' - tabular + column for CIGAR\n* '1 cigar qcov' - tabular + columns for CIGAR and query coverage\n* '1 cigar qcov qstrand' - tabular + columns for CIGAR, query coverage and strand\n", + "required" : false, + "choices" : [ + "0", + "1", + "1 cigar", + "1 cigar qcov", + "1 cigar qcov qstrand" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--num_alignments", + "description" : "Report first INT alignments per read reaching E-value. If Int = 0, all alignments will be output. Default: '0'\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_lis", + "description" : "search all alignments having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is\ncomputed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--print_all_reads", + "description" : "output null alignment strings for non-aligned reads to SAM and/or BLAST tabular files.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--paired_in", + "description" : "In the case where a pair of reads is aligned with a score above the threshold, the output of the reads is controlled\nby the following options:\n* --paired_in and --paired_out are both false: Only one read per pair is output to the aligned fasta file.\n* --paired_in is true and --paired_out is false: Both reads of the pair are output to the aligned fasta file.\n* --paired_in is false and --paired_out is true: Both reads are output the the other fasta file (if it is specified).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--paired_out", + "description" : "See description of --paired_in.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--out2", + "description" : "Output paired reads into separate files. Must be used with '--fastx'. If a single reads file is provided, this options\nimplies interleaved paired reads. When used with 'sout', four (4) output files for aligned reads will be generated:\n'aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd, aligned-singleton-rev'. If 'other' option is also used,\neight (8) output files will be generated.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--sout", + "description" : "Separate paired and singleton aligned reads. Must be used with '--fastx'. If a single reads file is provided,\nthis options implies interleaved paired reads. Cannot be used with '--paired_in' or '--paired_out'.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--zip_out", + "description" : "Compress the output files. The possible values are: \n* '1/true/t/yes/y'\n* '0/false/f/no/n'\n*'-1' (the same format as input - default)\nThe values are Not case sensitive.\n", + "example" : [ + "-1" + ], + "required" : false, + "choices" : [ + "1", + "true", + "t", + "yes", + "y", + "0", + "false", + "f", + "no", + "n", + "-1" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--match", + "description" : "Smith-Waterman score for a match (positive integer). Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--mismatch", + "description" : "Smith-Waterman penalty for a mismatch (negative integer). Default: '-3'.\n", + "example" : [ + -3 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gap_open", + "description" : "Smith-Waterman penalty for introducing a gap (positive integer). Default: '5'.\n", + "example" : [ + 5 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--gap_ext", + "description" : "Smith-Waterman penalty for extending a gap (positive integer). Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--N", + "description" : "Smith-Waterman penalty for ambiguous letters (N's) scored as --mismatch. Default: '-1'.\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--a", + "description" : "Number of threads to use. Default: '1'.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--e", + "description" : "E-value threshold. Default: '1'.\n", + "example" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--F", + "description" : "Search only the forward strand.", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--R", + "description" : "Search only the reverse-complementary strand.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--num_alignment", + "description" : "Report first INT alignments per read reaching E-value (--num_alignments 0 signifies all alignments will be output).\nDefault: '-1'\n", + "example" : [ + -1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--best", + "description" : "Report INT best alignments per read reaching E-value by searching --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments will be searched) Default: '1'.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Verbose output.", + "direction" : "input" + } + ] + }, + { + "name" : "OTU picking options", + "arguments" : [ + { + "type" : "double", + "name" : "--id", + "description" : "%id similarity threshold (the alignment must still pass the E-value threshold). Default: '0.97'.\n", + "example" : [ + 0.97 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--coverage", + "description" : "%query coverage threshold (the alignment must still pass the E-value threshold). Default: '0.97'.\n", + "example" : [ + 0.97 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--de_novo", + "description" : "FASTA/FASTQ file for reads matching database < %id off (set using --id) and < %cov (set using --coverage)\n(alignment must still pass the E-value threshold).\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--otu_map", + "description" : "Output OTU map (input to QIIME's make_otu_table.py).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Advanced options", + "arguments" : [ + { + "type" : "integer", + "name" : "--num_seed", + "description" : "Number of seeds matched before searching for candidate LIS. Default: '2'.\n", + "example" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--passes", + "description" : "Three intervals at which to place the seed on the read L,L/2,3 (L is the seed length set in ./indexdb_rna).\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--edge", + "description" : "The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the\nreference sequence before performing Smith-Waterman alignment. Default: '4'.\n", + "example" : [ + "4" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--full_search", + "description" : "Search for all 0-error and 1-error seed off matches in the index rather than stopping after finding a 0-error match\n(<1% gain in sensitivity with up four-fold decrease in speed).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Indexing Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--index", + "description" : "Create index files for the reference database. By default when this option is not used, the program checks the\nreference index and builds it if not already existing.\nThis can be changed by using '-index' as follows:\n* '-index 0' - skip indexing. If the index does not exist, the program will terminate\n and warn to build the index prior performing the alignment\n* '-index 1' - only perform the indexing and terminate\n* '-index 2' - the default behaviour, the same as when not using this option at all\n", + "example" : [ + 2 + ], + "required" : false, + "choices" : [ + 0, + 1, + 2 + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "-L", + "description" : "Indexing seed length. Default: '18'\n", + "example" : [ + 18.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--interval", + "description" : "Index every Nth L-mer in the reference database. Default: '1'\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_pos", + "description" : "Maximum number of positions to store for each unique L-mer. Set to 0 to store all positions. Default: '1000'\n", + "example" : [ + 1000 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Local sequence alignment tool for filtering, mapping and clustering. The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic data.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "sort", + "mRNA", + "rRNA", + "alignment", + "filtering", + "mapping", + "clustering" + ], + "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.1093/bioinformatics/bts611" + ] + }, + "links" : { + "repository" : "https://github.com/sortmerna/sortmerna", + "homepage" : "https://sortmerna.readthedocs.io/en/latest/", + "documentation" : "https://sortmerna.readthedocs.io/en/latest/manual4.0.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0", + "target_registry" : "images.viash-hub.com", + "target_tag" : "main", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "echo SortMeRNA: `sortmerna --version | sed -n 's/.*version \\\\([0-9]\\\\+\\\\.[0-9]\\\\+\\\\.[0-9]\\\\+\\\\).*/\\\\1/p'`\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/sortmerna/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/sortmerna", + "viash_version" : "0.9.0", + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" + }, + "package_config" : { + "name" : "biobox", + "version" : "main", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'main'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_REF+x} ]; then echo "${VIASH_PAR_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref='&'#" ; else echo "# par_ref="; fi ) +$( if [ ! -z ${VIASH_PAR_RIBO_DATABASE_MANIFEST+x} ]; then echo "${VIASH_PAR_RIBO_DATABASE_MANIFEST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ribo_database_manifest='&'#" ; else echo "# par_ribo_database_manifest="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OTHER+x} ]; then echo "${VIASH_PAR_OTHER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_other='&'#" ; else echo "# par_other="; fi ) +$( if [ ! -z ${VIASH_PAR_KVDB+x} ]; then echo "${VIASH_PAR_KVDB}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kvdb='&'#" ; else echo "# par_kvdb="; fi ) +$( if [ ! -z ${VIASH_PAR_IDX_DIR+x} ]; then echo "${VIASH_PAR_IDX_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_idx_dir='&'#" ; else echo "# par_idx_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_READB+x} ]; then echo "${VIASH_PAR_READB}" | sed "s#'#'\\"'\\"'#g;s#.*#par_readb='&'#" ; else echo "# par_readb="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTX+x} ]; then echo "${VIASH_PAR_FASTX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastx='&'#" ; else echo "# par_fastx="; fi ) +$( if [ ! -z ${VIASH_PAR_SAM+x} ]; then echo "${VIASH_PAR_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam='&'#" ; else echo "# par_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_SQ+x} ]; then echo "${VIASH_PAR_SQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sq='&'#" ; else echo "# par_sq="; fi ) +$( if [ ! -z ${VIASH_PAR_BLAST+x} ]; then echo "${VIASH_PAR_BLAST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_blast='&'#" ; else echo "# par_blast="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_ALIGNMENTS+x} ]; then echo "${VIASH_PAR_NUM_ALIGNMENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_alignments='&'#" ; else echo "# par_num_alignments="; fi ) +$( if [ ! -z ${VIASH_PAR_MIN_LIS+x} ]; then echo "${VIASH_PAR_MIN_LIS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_lis='&'#" ; else echo "# par_min_lis="; fi ) +$( if [ ! -z ${VIASH_PAR_PRINT_ALL_READS+x} ]; then echo "${VIASH_PAR_PRINT_ALL_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_print_all_reads='&'#" ; else echo "# par_print_all_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED_IN+x} ]; then echo "${VIASH_PAR_PAIRED_IN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired_in='&'#" ; else echo "# par_paired_in="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED_OUT+x} ]; then echo "${VIASH_PAR_PAIRED_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired_out='&'#" ; else echo "# par_paired_out="; fi ) +$( if [ ! -z ${VIASH_PAR_OUT2+x} ]; then echo "${VIASH_PAR_OUT2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_out2='&'#" ; else echo "# par_out2="; fi ) +$( if [ ! -z ${VIASH_PAR_SOUT+x} ]; then echo "${VIASH_PAR_SOUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sout='&'#" ; else echo "# par_sout="; fi ) +$( if [ ! -z ${VIASH_PAR_ZIP_OUT+x} ]; then echo "${VIASH_PAR_ZIP_OUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip_out='&'#" ; else echo "# par_zip_out="; fi ) +$( if [ ! -z ${VIASH_PAR_MATCH+x} ]; then echo "${VIASH_PAR_MATCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_match='&'#" ; else echo "# par_match="; fi ) +$( if [ ! -z ${VIASH_PAR_MISMATCH+x} ]; then echo "${VIASH_PAR_MISMATCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mismatch='&'#" ; else echo "# par_mismatch="; fi ) +$( if [ ! -z ${VIASH_PAR_GAP_OPEN+x} ]; then echo "${VIASH_PAR_GAP_OPEN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gap_open='&'#" ; else echo "# par_gap_open="; fi ) +$( if [ ! -z ${VIASH_PAR_GAP_EXT+x} ]; then echo "${VIASH_PAR_GAP_EXT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gap_ext='&'#" ; else echo "# par_gap_ext="; fi ) +$( if [ ! -z ${VIASH_PAR_N+x} ]; then echo "${VIASH_PAR_N}" | sed "s#'#'\\"'\\"'#g;s#.*#par_N='&'#" ; else echo "# par_N="; fi ) +$( if [ ! -z ${VIASH_PAR_A+x} ]; then echo "${VIASH_PAR_A}" | sed "s#'#'\\"'\\"'#g;s#.*#par_a='&'#" ; else echo "# par_a="; fi ) +$( if [ ! -z ${VIASH_PAR_E+x} ]; then echo "${VIASH_PAR_E}" | sed "s#'#'\\"'\\"'#g;s#.*#par_e='&'#" ; else echo "# par_e="; fi ) +$( if [ ! -z ${VIASH_PAR_F+x} ]; then echo "${VIASH_PAR_F}" | sed "s#'#'\\"'\\"'#g;s#.*#par_F='&'#" ; else echo "# par_F="; fi ) +$( if [ ! -z ${VIASH_PAR_R+x} ]; then echo "${VIASH_PAR_R}" | sed "s#'#'\\"'\\"'#g;s#.*#par_R='&'#" ; else echo "# par_R="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_ALIGNMENT+x} ]; then echo "${VIASH_PAR_NUM_ALIGNMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_alignment='&'#" ; else echo "# par_num_alignment="; fi ) +$( if [ ! -z ${VIASH_PAR_BEST+x} ]; then echo "${VIASH_PAR_BEST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_best='&'#" ; else echo "# par_best="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) +$( if [ ! -z ${VIASH_PAR_COVERAGE+x} ]; then echo "${VIASH_PAR_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_coverage='&'#" ; else echo "# par_coverage="; fi ) +$( if [ ! -z ${VIASH_PAR_DE_NOVO+x} ]; then echo "${VIASH_PAR_DE_NOVO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_de_novo='&'#" ; else echo "# par_de_novo="; fi ) +$( if [ ! -z ${VIASH_PAR_OTU_MAP+x} ]; then echo "${VIASH_PAR_OTU_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_otu_map='&'#" ; else echo "# par_otu_map="; fi ) +$( if [ ! -z ${VIASH_PAR_NUM_SEED+x} ]; then echo "${VIASH_PAR_NUM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_seed='&'#" ; else echo "# par_num_seed="; fi ) +$( if [ ! -z ${VIASH_PAR_PASSES+x} ]; then echo "${VIASH_PAR_PASSES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_passes='&'#" ; else echo "# par_passes="; fi ) +$( if [ ! -z ${VIASH_PAR_EDGE+x} ]; then echo "${VIASH_PAR_EDGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_edge='&'#" ; else echo "# par_edge="; fi ) +$( if [ ! -z ${VIASH_PAR_FULL_SEARCH+x} ]; then echo "${VIASH_PAR_FULL_SEARCH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_full_search='&'#" ; else echo "# par_full_search="; fi ) +$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) +$( if [ ! -z ${VIASH_PAR_L+x} ]; then echo "${VIASH_PAR_L}" | sed "s#'#'\\"'\\"'#g;s#.*#par_L='&'#" ; else echo "# par_L="; fi ) +$( if [ ! -z ${VIASH_PAR_INTERVAL+x} ]; then echo "${VIASH_PAR_INTERVAL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interval='&'#" ; else echo "# par_interval="; fi ) +$( if [ ! -z ${VIASH_PAR_MAX_POS+x} ]; then echo "${VIASH_PAR_MAX_POS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max_pos='&'#" ; else echo "# par_max_pos="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -eo pipefail + +unset_if_false=( par_fastx par_sq par_fastx par_print_all_reads par_paired_in par_paired_out + par_F par_R par_verbose par_de_novo par_otu_map par_full_search par_out2 + par_sout par_sam par_paired ) + + +for var in "\\${unset_if_false[@]}"; do + if [ "\\${!var}" == "false" ]; then + unset \\$var + fi +done + +reads=() +IFS=";" read -ra input <<< "\\$par_input" +if [ "\\${#input[@]}" -eq 2 ]; then + reads="--reads \\${input[0]} --reads \\${input[1]}" + # set paired to true in case it's not + par_paired=true +else + reads="--reads \\${input[0]}" + par_paired=false +fi + +refs=() + +# check if references are input normally or through a manifest file +if [[ ! -z "\\$par_ribo_database_manifest" ]]; then + while IFS= read -r path || [[ -n \\$path ]]; do + refs=\\$refs" --ref \\$path" + done < \\$par_ribo_database_manifest + +elif [[ ! -z "\\$par_ref" ]]; then + IFS=";" read -ra ref <<< "\\$par_ref" + for i in "\\${ref[@]}" + do + refs+="-ref \\$i " + done + +else + echo "No reference fasta file(s) provided" + exit 1 +fi + + +sortmerna \\\\ + \\$refs \\\\ + \\$reads \\\\ + --workdir . \\\\ + \\${par_output:+--aligned "\\${par_output}"} \\\\ + \\${par_fastx:+--fastx} \\\\ + \\${par_other:+--other "\\${par_other}"} \\\\ + \\${par_kvdb:+--kvdb "\\${par_kvdb}"} \\\\ + \\${par_idx_dir:+--idx-dir "\\${par_idx_dir}"} \\\\ + \\${par_readb:+--readb "\\${par_readb}"} \\\\ + \\${par_sam:+--sam} \\\\ + \\${par_sq:+--sq} \\\\ + \\${par_blast:+--blast "\\${par_blast}"} \\\\ + \\${par_num_alignments:+--num_alignments "\\${par_num_alignments}"} \\\\ + \\${par_min_lis:+--min_lis "\\${par_min_lis}"} \\\\ + \\${par_print_all_reads:+--print_all_reads} \\\\ + \\${par_paired_in:+--paired_in} \\\\ + \\${par_paired_out:+--paired_out} \\\\ + \\${par_out2:+--out2} \\\\ + \\${par_sout:+--sout} \\\\ + \\${par_zip_out:+--zip-out "\\${par_zip_out}"} \\\\ + \\${par_match:+--match "\\${par_match}"} \\\\ + \\${par_mismatch:+--mismatch "\\${par_mismatch}"} \\\\ + \\${par_gap_open:+--gap_open "\\${par_gap_open}"} \\\\ + \\${par_gap_ext:+--gap_ext "\\${par_gap_ext}"} \\\\ + \\${par_N:+-N "\\${par_N}"} \\\\ + \\${par_a:+-a "\\${par_a}"} \\\\ + \\${par_e:+-e "\\${par_e}"} \\\\ + \\${par_F:+-F} \\\\ + \\${par_R:+-R} \\\\ + \\${par_num_alignment:+--num_alignment "\\${par_num_alignment}"} \\\\ + \\${par_best:+--best "\\${par_best}"} \\\\ + \\${par_verbose:+--verbose} \\\\ + \\${par_id:+--id "\\${par_id}"} \\\\ + \\${par_coverage:+--coverage "\\${par_coverage}"} \\\\ + \\${par_de_novo:+--de_novo} \\\\ + \\${par_otu_map:+--otu_map} \\\\ + \\${par_num_seed:+--num_seed "\\${par_num_seed}"} \\\\ + \\${par_passes:+--passes "\\${par_passes}"} \\\\ + \\${par_edge:+--edge "\\${par_edge}"} \\\\ + \\${par_full_search:+--full_search} \\\\ + \\${par_index:+--index "\\${par_index}"} \\\\ + \\${par_L:+-L \\$par_L} \\\\ + \\${par_interval:+--interval "\\${par_interval}"} \\\\ + \\${par_max_pos:+--max_pos "\\${par_max_pos}"} + + +if [ ! -z \\$par_log ]; then + mv "\\${par_output}.log" \\$par_log +fi + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/sortmerna", + "tag" : "main" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow.config new file mode 100644 index 0000000..42e651b --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow.config @@ -0,0 +1,125 @@ +manifest { + name = 'sortmerna' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'main' + description = 'Local sequence alignment tool for filtering, mapping and clustering. The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic data.\n' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + + diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json new file mode 100644 index 0000000..d9bfdad --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/nextflow_schema.json @@ -0,0 +1,614 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "sortmerna", +"description": "Local sequence alignment tool for filtering, mapping and clustering. The main \napplication of SortMeRNA is filtering rRNA from metatranscriptomic data.\n", +"type": "object", +"definitions": { + + + + "input" : { + "title": "Input", + "type": "object", + "description": "No description", + "properties": { + + + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Reads are paired-end", + "help_text": "Type: `boolean_true`, default: `false`. Reads are paired-end. If a single reads file is provided, use this option \nto indicate the file contains interleaved paired reads when neither\n\u0027paired_in\u0027 | \u0027paired_out\u0027 | \u0027out2\u0027 | \u0027sout\u0027 are specified.\n" + , + "default": "False" + } + + + , + "input": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Input fastq", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input fastq" + + } + + + , + "ref": { + "type": + "string", + "description": "Type: List of `file`, multiple_sep: `\";\"`. Reference fasta file(s) for rRNA database", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Reference fasta file(s) for rRNA database." + + } + + + , + "ribo_database_manifest": { + "type": + "string", + "description": "Type: `file`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA", + "help_text": "Type: `file`. Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA." + + } + + +} +}, + + + "output" : { + "title": "Output", + "type": "object", + "description": "No description", + "properties": { + + + "log": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.log.log`, example: `$id.sortmerna.log`. Sortmerna log file", + "help_text": "Type: `file`, default: `$id.$key.log.log`, example: `$id.sortmerna.log`. Sortmerna log file." + , + "default": "$id.$key.log.log" + } + + + , + "output": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.output.output`. Directory and file prefix for aligned output", + "help_text": "Type: `file`, default: `$id.$key.output.output`. Directory and file prefix for aligned output. The appropriate extension: \n(fasta|fastq|blast|sam|etc) is automatically added.\nIf \u0027dir\u0027 is not specified, the output is created in the WORKDIR/out/.\nIf \u0027pfx\u0027 is not specified, the prefix \u0027aligned\u0027 is used.\n" + , + "default": "$id.$key.output.output" + } + + + , + "other": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.other.other`. Create Non-aligned reads output file with this path/prefix", + "help_text": "Type: `file`, default: `$id.$key.other.other`. Create Non-aligned reads output file with this path/prefix. Must be used with fastx." + , + "default": "$id.$key.other.other" + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "kvdb": { + "type": + "string", + "description": "Type: `string`. Path to directory of the key-value database file, used for storing the alignment results", + "help_text": "Type: `string`. Path to directory of the key-value database file, used for storing the alignment results." + + } + + + , + "idx_dir": { + "type": + "string", + "description": "Type: `string`. Path to the directory for storing the reference index files", + "help_text": "Type: `string`. Path to the directory for storing the reference index files." + + } + + + , + "readb": { + "type": + "string", + "description": "Type: `string`. Path to the directory for storing pre-processed reads", + "help_text": "Type: `string`. Path to the directory for storing pre-processed reads." + + } + + + , + "fastx": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output aligned reads into FASTA/FASTQ file", + "help_text": "Type: `boolean_true`, default: `false`. Output aligned reads into FASTA/FASTQ file" + , + "default": "False" + } + + + , + "sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output SAM alignment for aligned reads", + "help_text": "Type: `boolean_true`, default: `false`. Output SAM alignment for aligned reads." + , + "default": "False" + } + + + , + "sq": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add SQ tags to the SAM file", + "help_text": "Type: `boolean_true`, default: `false`. Add SQ tags to the SAM file" + , + "default": "False" + } + + + , + "blast": { + "type": + "string", + "description": "Type: `string`, choices: ``0`, `1`, `1 cigar`, `1 cigar qcov`, `1 cigar qcov qstrand``. Blast options:\n* \u00270\u0027 - pairwise\n* \u00271\u0027 - tabular(Blast - m 8 format)\n* \u00271 cigar\u0027 - tabular + column for CIGAR\n* \u00271 cigar qcov\u0027 - tabular + columns for CIGAR and query coverage\n* \u00271 cigar qcov qstrand\u0027 - tabular + columns for CIGAR, query coverage and strand\n", + "help_text": "Type: `string`, choices: ``0`, `1`, `1 cigar`, `1 cigar qcov`, `1 cigar qcov qstrand``. Blast options:\n* \u00270\u0027 - pairwise\n* \u00271\u0027 - tabular(Blast - m 8 format)\n* \u00271 cigar\u0027 - tabular + column for CIGAR\n* \u00271 cigar qcov\u0027 - tabular + columns for CIGAR and query coverage\n* \u00271 cigar qcov qstrand\u0027 - tabular + columns for CIGAR, query coverage and strand\n", + "enum": ["0", "1", "1 cigar", "1 cigar qcov", "1 cigar qcov qstrand"] + + + } + + + , + "num_alignments": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Report first INT alignments per read reaching E-value", + "help_text": "Type: `integer`, example: `0`. Report first INT alignments per read reaching E-value. If Int = 0, all alignments will be output. Default: \u00270\u0027\n" + + } + + + , + "min_lis": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. search all alignments having the first INT longest LIS", + "help_text": "Type: `integer`, example: `2`. search all alignments having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is\ncomputed using seeds\u0027 positions to expand hits into longer matches prior to Smith-Waterman alignment. Default: \u00272\u0027.\n" + + } + + + , + "print_all_reads": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. output null alignment strings for non-aligned reads to SAM and/or BLAST tabular files", + "help_text": "Type: `boolean_true`, default: `false`. output null alignment strings for non-aligned reads to SAM and/or BLAST tabular files." + , + "default": "False" + } + + + , + "paired_in": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. In the case where a pair of reads is aligned with a score above the threshold, the output of the reads is controlled\nby the following options:\n* --paired_in and --paired_out are both false: Only one read per pair is output to the aligned fasta file", + "help_text": "Type: `boolean_true`, default: `false`. In the case where a pair of reads is aligned with a score above the threshold, the output of the reads is controlled\nby the following options:\n* --paired_in and --paired_out are both false: Only one read per pair is output to the aligned fasta file.\n* --paired_in is true and --paired_out is false: Both reads of the pair are output to the aligned fasta file.\n* --paired_in is false and --paired_out is true: Both reads are output the the other fasta file (if it is specified).\n" + , + "default": "False" + } + + + , + "paired_out": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. See description of --paired_in", + "help_text": "Type: `boolean_true`, default: `false`. See description of --paired_in." + , + "default": "False" + } + + + , + "out2": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output paired reads into separate files", + "help_text": "Type: `boolean_true`, default: `false`. Output paired reads into separate files. Must be used with \u0027--fastx\u0027. If a single reads file is provided, this options\nimplies interleaved paired reads. When used with \u0027sout\u0027, four (4) output files for aligned reads will be generated:\n\u0027aligned-paired-fwd, aligned-paired-rev, aligned-singleton-fwd, aligned-singleton-rev\u0027. If \u0027other\u0027 option is also used,\neight (8) output files will be generated.\n" + , + "default": "False" + } + + + , + "sout": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Separate paired and singleton aligned reads", + "help_text": "Type: `boolean_true`, default: `false`. Separate paired and singleton aligned reads. Must be used with \u0027--fastx\u0027. If a single reads file is provided,\nthis options implies interleaved paired reads. Cannot be used with \u0027--paired_in\u0027 or \u0027--paired_out\u0027.\n" + , + "default": "False" + } + + + , + "zip_out": { + "type": + "string", + "description": "Type: `string`, example: `-1`, choices: ``1`, `true`, `t`, `yes`, `y`, `0`, `false`, `f`, `no`, `n`, `-1``. Compress the output files", + "help_text": "Type: `string`, example: `-1`, choices: ``1`, `true`, `t`, `yes`, `y`, `0`, `false`, `f`, `no`, `n`, `-1``. Compress the output files. The possible values are: \n* \u00271/true/t/yes/y\u0027\n* \u00270/false/f/no/n\u0027\n*\u0027-1\u0027 (the same format as input - default)\nThe values are Not case sensitive.\n", + "enum": ["1", "true", "t", "yes", "y", "0", "false", "f", "no", "n", "-1"] + + + } + + + , + "match": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Smith-Waterman score for a match (positive integer)", + "help_text": "Type: `integer`, example: `2`. Smith-Waterman score for a match (positive integer). Default: \u00272\u0027.\n" + + } + + + , + "mismatch": { + "type": + "integer", + "description": "Type: `integer`, example: `-3`. Smith-Waterman penalty for a mismatch (negative integer)", + "help_text": "Type: `integer`, example: `-3`. Smith-Waterman penalty for a mismatch (negative integer). Default: \u0027-3\u0027.\n" + + } + + + , + "gap_open": { + "type": + "integer", + "description": "Type: `integer`, example: `5`. Smith-Waterman penalty for introducing a gap (positive integer)", + "help_text": "Type: `integer`, example: `5`. Smith-Waterman penalty for introducing a gap (positive integer). Default: \u00275\u0027.\n" + + } + + + , + "gap_ext": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Smith-Waterman penalty for extending a gap (positive integer)", + "help_text": "Type: `integer`, example: `2`. Smith-Waterman penalty for extending a gap (positive integer). Default: \u00272\u0027.\n" + + } + + + , + "N": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. Smith-Waterman penalty for ambiguous letters (N\u0027s) scored as --mismatch", + "help_text": "Type: `integer`, example: `-1`. Smith-Waterman penalty for ambiguous letters (N\u0027s) scored as --mismatch. Default: \u0027-1\u0027.\n" + + } + + + , + "a": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Number of threads to use", + "help_text": "Type: `integer`, example: `1`. Number of threads to use. Default: \u00271\u0027.\n" + + } + + + , + "e": { + "type": + "number", + "description": "Type: `double`, example: `1.0`. E-value threshold", + "help_text": "Type: `double`, example: `1.0`. E-value threshold. Default: \u00271\u0027.\n" + + } + + + , + "F": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Search only the forward strand", + "help_text": "Type: `boolean_true`, default: `false`. Search only the forward strand." + , + "default": "False" + } + + + , + "R": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Search only the reverse-complementary strand", + "help_text": "Type: `boolean_true`, default: `false`. Search only the reverse-complementary strand." + , + "default": "False" + } + + + , + "num_alignment": { + "type": + "integer", + "description": "Type: `integer`, example: `-1`. Report first INT alignments per read reaching E-value (--num_alignments 0 signifies all alignments will be output)", + "help_text": "Type: `integer`, example: `-1`. Report first INT alignments per read reaching E-value (--num_alignments 0 signifies all alignments will be output).\nDefault: \u0027-1\u0027\n" + + } + + + , + "best": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Report INT best alignments per read reaching E-value by searching --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments will be searched) Default: \u00271\u0027", + "help_text": "Type: `integer`, example: `1`. Report INT best alignments per read reaching E-value by searching --min_lis INT candidate alignments (--best 0\nsignifies all candidate alignments will be searched) Default: \u00271\u0027.\n" + + } + + + , + "verbose": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Verbose output", + "help_text": "Type: `boolean_true`, default: `false`. Verbose output." + , + "default": "False" + } + + +} +}, + + + "otu picking options" : { + "title": "OTU picking options", + "type": "object", + "description": "No description", + "properties": { + + + "id": { + "type": + "number", + "description": "Type: `double`, example: `0.97`. %id similarity threshold (the alignment must still pass the E-value threshold)", + "help_text": "Type: `double`, example: `0.97`. %id similarity threshold (the alignment must still pass the E-value threshold). Default: \u00270.97\u0027.\n" + + } + + + , + "coverage": { + "type": + "number", + "description": "Type: `double`, example: `0.97`. %query coverage threshold (the alignment must still pass the E-value threshold)", + "help_text": "Type: `double`, example: `0.97`. %query coverage threshold (the alignment must still pass the E-value threshold). Default: \u00270.97\u0027.\n" + + } + + + , + "de_novo": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. FASTA/FASTQ file for reads matching database \u003c %id off (set using --id) and \u003c %cov (set using --coverage)\n(alignment must still pass the E-value threshold)", + "help_text": "Type: `boolean_true`, default: `false`. FASTA/FASTQ file for reads matching database \u003c %id off (set using --id) and \u003c %cov (set using --coverage)\n(alignment must still pass the E-value threshold).\n" + , + "default": "False" + } + + + , + "otu_map": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Output OTU map (input to QIIME\u0027s make_otu_table", + "help_text": "Type: `boolean_true`, default: `false`. Output OTU map (input to QIIME\u0027s make_otu_table.py).\n" + , + "default": "False" + } + + +} +}, + + + "advanced options" : { + "title": "Advanced options", + "type": "object", + "description": "No description", + "properties": { + + + "num_seed": { + "type": + "integer", + "description": "Type: `integer`, example: `2`. Number of seeds matched before searching for candidate LIS", + "help_text": "Type: `integer`, example: `2`. Number of seeds matched before searching for candidate LIS. Default: \u00272\u0027.\n" + + } + + + , + "passes": { + "type": + "string", + "description": "Type: List of `integer`, multiple_sep: `\";\"`. Three intervals at which to place the seed on the read L,L/2,3 (L is the seed length set in ", + "help_text": "Type: List of `integer`, multiple_sep: `\";\"`. Three intervals at which to place the seed on the read L,L/2,3 (L is the seed length set in ./indexdb_rna).\n" + + } + + + , + "edge": { + "type": + "string", + "description": "Type: `string`, example: `4`. The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the\nreference sequence before performing Smith-Waterman alignment", + "help_text": "Type: `string`, example: `4`. The number (or percentage if followed by %) of nucleotides to add to each edge of the alignment region on the\nreference sequence before performing Smith-Waterman alignment. Default: \u00274\u0027.\n" + + } + + + , + "full_search": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Search for all 0-error and 1-error seed off matches in the index rather than stopping after finding a 0-error match\n(\u003c1% gain in sensitivity with up four-fold decrease in speed)", + "help_text": "Type: `boolean_true`, default: `false`. Search for all 0-error and 1-error seed off matches in the index rather than stopping after finding a 0-error match\n(\u003c1% gain in sensitivity with up four-fold decrease in speed).\n" + , + "default": "False" + } + + +} +}, + + + "indexing options" : { + "title": "Indexing Options", + "type": "object", + "description": "No description", + "properties": { + + + "index": { + "type": + "integer", + "description": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2``. Create index files for the reference database", + "help_text": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2``. Create index files for the reference database. By default when this option is not used, the program checks the\nreference index and builds it if not already existing.\nThis can be changed by using \u0027-index\u0027 as follows:\n* \u0027-index 0\u0027 - skip indexing. If the index does not exist, the program will terminate\n and warn to build the index prior performing the alignment\n* \u0027-index 1\u0027 - only perform the indexing and terminate\n* \u0027-index 2\u0027 - the default behaviour, the same as when not using this option at all\n", + "enum": [0, 1, 2] + + + } + + + , + "-L": { + "type": + "number", + "description": "Type: `double`, example: `18.0`. Indexing seed length", + "help_text": "Type: `double`, example: `18.0`. Indexing seed length. Default: \u002718\u0027\n" + + } + + + , + "interval": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. Index every Nth L-mer in the reference database", + "help_text": "Type: `integer`, example: `1`. Index every Nth L-mer in the reference database. Default: \u00271\u0027\n" + + } + + + , + "max_pos": { + "type": + "integer", + "description": "Type: `integer`, example: `1000`. Maximum number of positions to store for each unique L-mer", + "help_text": "Type: `integer`, example: `1000`. Maximum number of positions to store for each unique L-mer. Set to 0 to store all positions. Default: \u00271000\u0027\n" + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/input" + }, + + { + "$ref": "#/definitions/output" + }, + + { + "$ref": "#/definitions/options" + }, + + { + "$ref": "#/definitions/otu picking options" + }, + + { + "$ref": "#/definitions/advanced options" + }, + + { + "$ref": "#/definitions/indexing options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/nextflow/trimgalore/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml similarity index 88% rename from target/nextflow/trimgalore/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml index b194bd3..f2b1494 100644 --- a/target/nextflow/trimgalore/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/.config.vsh.yaml @@ -1,5 +1,19 @@ name: "trimgalore" version: "main" +authors: +- name: "Sai Nirmayi Yasa" + roles: + - "author" + - "maintainer" + info: + links: + email: "nirmayi@data-intuitive.com" + github: "sainirmayi" + linkedin: "sai-nirmayi-yasa" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Junior Bioinformatics Researcher" argument_groups: - name: "Input" arguments: @@ -36,42 +50,79 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--phred33" description: "Instructs Cutadapt to use ASCII+33 quality scores as Phred scores\ \ (Sanger/Illumina 1.9+ encoding) for quality trimming." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--phred64" description: "Instructs Cutadapt to use ASCII+64 quality scores as Phred scores\ \ (Illumina 1.5 encoding) for quality trimming." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--fastqc" description: "Run FastQC in the default mode on the FastQ file once trimming is\ \ complete." info: null + direction: "input" + - type: "string" + name: "--fastqc_args" + description: "Passes extra arguments (excluding files) to FastQC. If more than\ + \ one argument is to be passed to FastQC they must be in the form \"arg1 arg2\ + \ ...\". Passing extra arguments will automatically invoke FastQC, so --fastqc\ + \ does not have to be specified separately." + info: null + example: + - "--nogroup --noextract" required: false direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--fastqc_args" - description: "Passes extra arguments to FastQC. If more than one argument is to\ - \ be passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra\ - \ arguments will automatically invoke FastQC, so --fastqc does not have to be\ - \ specified separately." + - type: "file" + name: "--fastqc_contaminants" + description: "Specifies a non-default file which contains the list of contaminants\ + \ for FastQC to screen overrepresented sequences against. The file must contain\ + \ sets of named contaminants in the form name[tab]sequence. Lines prefixed with\ + \ a hash will be ignored." info: null example: - - "--nogroup --outdir /home/" + - "contaminants.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_adapters" + description: "Specifies a non-default file which contains the list of adapter\ + \ sequences which which FasstQC will explicity search against the library. The\ + \ file must contain sets of named adapters in the form name[tab]sequence. Lines\ + \ prefixed with a hash will be ignored." + info: null + example: + - "adapters.txt" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--fastqc_limits" + description: "Specifies a non-default file which contains a set of criteria which\ + \ FastQC will use to determine the warn/error limits for the various modules.\ + \ This file can also be used to selectively remove some modules from the output\ + \ all together. The format needs to mirror the default limits.txt file found\ + \ in the Configuration folder." + info: null + example: + - "limits.txt" + must_exist: true + create_parent: true required: false direction: "input" multiple: false @@ -88,7 +139,7 @@ argument_groups: \ -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor\ \ so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\ \nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters.\ - \ \n example: 20\n" + \ \n" info: null example: - "AGCTCCCG" @@ -112,36 +163,27 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--illumina" description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\ \ universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of\ \ adapter sequence." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--stranded_illumina" description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\ \ stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default\ \ auto-detection of adapter sequence." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--nextera" description: "Adapter sequence to be trimmed is the first 12bp of the Nextera\ \ adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--small_rna" description: "Adapter sequence to be trimmed is the first 12bp of the Illumina\ \ Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection\ @@ -149,10 +191,7 @@ argument_groups: \ --length value to 18bp. If the smallRNA libraries are paired-end then a automatically\ \ (GATCGTCGGACT) unless -a 2 had been defined explicitly." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--consider_already_trimmed" description: "During adapter auto-detection, the limit set by this argument allows\ @@ -199,25 +238,19 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--gzip" description: "Compress the output file with GZIP. If the input files are GZIP-compressed\ \ the output files will automatically be GZIP compressed as well. As of v0.2.8\ \ the compression will take place on the fly." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--dont_gzip" description: "Output files won't be compressed with GZIP. This option overrides\ \ --gzip." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--length" description: "Discard reads that became shorter than the specified length because\ @@ -245,31 +278,22 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--trim_n" description: "Removes Ns from either side of the read. This option does currently\ \ not work in RRBS mode." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--no_report_file" description: "If specified no report file will be generated." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--suppress_warn" description: "If specified any output to STDOUT or STDERR will be suppressed." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--clip_R1" description: "Instructs TrimGalore to remove given number of bp from the 5' end\ @@ -339,18 +363,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" - name: "--cores" - alternatives: - - "-j" - description: "Number of cores to be used for trimming" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Specific trimming options without adapter/quality trimming" arguments: - type: "integer" @@ -375,16 +387,13 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--clock" description: "In this mode, reads are trimmed in a specific way that is currently\ \ used for the Mouse Epigenetic Clock." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--polyA" description: "This is a new, still experimental, trimming mode to identify and\ \ remove poly-A tails from sequences. When --polyA is selected, Trim Galore\ @@ -403,11 +412,8 @@ argument_groups: \ for Poly-A tails, and it is the user's responsibility to carry out an initial\ \ round of trimming." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--implicon" description: "This is a special mode of operation for paired-end data, such as\ \ required for the IMPLICON method, where a UMI sequence is getting transferred\ @@ -419,13 +425,10 @@ argument_groups: \ fragment to be sequenced. The UMI of Read 2 (R2) is written into the read\ \ ID of both reads and removed from the actual sequence.\n" info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - name: "RRBS-specific options" arguments: - - type: "boolean" + - type: "boolean_true" name: "--rrbs" description: "Specifies that the input file was an MspI digested RRBS sample (recognition\ \ site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed\ @@ -437,11 +440,8 @@ argument_groups: \ fragments. This option is not recommended for users of the Tecan Ovation RRBS\ \ Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--non_directional" description: "Selecting this option for non-directional RRBS libraries will screen\ \ quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and,\ @@ -450,21 +450,15 @@ argument_groups: \ step. '--non_directional' requires '--rrbs' to be specified as well. Note\ \ that this option does not set '--clip_r2 2' in paired-end mode." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--keep" description: "Keep the quality trimmed intermediate file." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - name: "Paired-end specific options" arguments: - - type: "boolean" + - type: "boolean_true" name: "--paired" description: "This option performs length trimming of quality/adapter/RRBS trimmed\ \ reads for paired-end files. To pass the validation test, both sequences of\ @@ -476,21 +470,15 @@ argument_groups: \ paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq\ \ SRR2_1.fq.gz SRR2_2.fq.gz ... ." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" + - type: "boolean_true" name: "--retain_unpaired" description: "If only one of the two paired-end reads became too short, the longer\ \ read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output\ \ files. The length cutoff for unpaired single-end reads is governed by the\ \ parameters -r1/--length_1 and -r2/--length_2." info: null - required: false direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--length_1" alternatives: @@ -530,7 +518,7 @@ argument_groups: - "trimmed_output" must_exist: true create_parent: true - required: false + required: true direction: "output" multiple: false multiple_sep: ";" @@ -540,7 +528,7 @@ argument_groups: \ files (paired-end) are specified, but not for longer lists." info: null example: - - "read_1.fastq.gz" + - "read_1.fastq" must_exist: true create_parent: true required: false @@ -553,7 +541,7 @@ argument_groups: \ files (paired-end) are specified, but not for longer lists." info: null example: - - "read_2.fastq.gz" + - "read_2.fastq" must_exist: true create_parent: true required: false @@ -683,19 +671,13 @@ status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" keywords: - "trimming" - "adapters" license: "GPL-3.0" +references: + doi: + - "10.5281/zenodo.7598955" links: repository: "https://github.com/FelixKrueger/TrimGalore" homepage: "https://github.com/FelixKrueger/TrimGalore" @@ -768,7 +750,7 @@ runners: engines: - type: "docker" id: "docker" - image: "quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0" + image: "quay.io/biocontainers/trim-galore:0.6.10--hdfd78af_0" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" @@ -788,31 +770,28 @@ build_info: output: "target/nextflow/trimgalore" executable: "target/nextflow/trimgalore/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/trimgalore/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf similarity index 96% rename from target/nextflow/trimgalore/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf index d28c34a..f9b9b84 100644 --- a/target/nextflow/trimgalore/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf @@ -8,6 +8,9 @@ // authors of this component should specify the license in the header of such // files, or include a separate license file detailing the licenses of all included // files. +// +// Component authors: +// * Sai Nirmayi Yasa (author, maintainer) //////////////////////////// // VDSL3 helper functions // @@ -2806,6 +2809,29 @@ meta = [ "config": processConfig(readJsonBlob('''{ "name" : "trimgalore", "version" : "main", + "authors" : [ + { + "name" : "Sai Nirmayi Yasa", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "nirmayi@data-intuitive.com", + "github" : "sainirmayi", + "linkedin" : "sai-nirmayi-yasa" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Junior Bioinformatics Researcher" + } + ] + } + } + ], "argument_groups" : [ { "name" : "Input", @@ -2845,51 +2871,84 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--phred33", "description" : "Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--phred64", "description" : "Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--fastqc", "description" : "Run FastQC in the default mode on the FastQ file once trimming is complete.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { "type" : "string", "name" : "--fastqc_args", - "description" : "Passes extra arguments to FastQC. If more than one argument is to be passed to FastQC they must be in the form \\"arg1 arg2 ...\\". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately.", + "description" : "Passes extra arguments (excluding files) to FastQC. If more than one argument is to be passed to FastQC they must be in the form \\"arg1 arg2 ...\\". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately.", "example" : [ - "--nogroup --outdir /home/" + "--nogroup --noextract" ], "required" : false, "direction" : "input", "multiple" : false, "multiple_sep" : ";" }, + { + "type" : "file", + "name" : "--fastqc_contaminants", + "description" : "Specifies a non-default file which contains the list of contaminants for FastQC to screen overrepresented sequences against. The file must contain sets of named contaminants in the form name[tab]sequence. Lines prefixed with a hash will be ignored.", + "example" : [ + "contaminants.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_adapters", + "description" : "Specifies a non-default file which contains the list of adapter sequences which which FasstQC will explicity search against the library. The file must contain sets of named adapters in the form name[tab]sequence. Lines prefixed with a hash will be ignored.", + "example" : [ + "adapters.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--fastqc_limits", + "description" : "Specifies a non-default file which contains a set of criteria which FastQC will use to determine the warn/error limits for the various modules. This file can also be used to selectively remove some modules from the output all together. The format needs to mirror the default limits.txt file found in the Configuration folder.", + "example" : [ + "limits.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, { "type" : "string", "name" : "--adapter", "alternatives" : [ "-a" ], - "description" : "Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request, multiple adapters can also be specified like so: \n -a \\" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\\" -a2 \\" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\\", \nor so:\n -a \\"file:../multiple_adapters.fa\\" -a2 \\"file:../different_adapters.fa\\"\nPotentially in conjucntion with the parameter \\"-n 3\\" to trim all adapters. \n example: 20\n", + "description" : "Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request, multiple adapters can also be specified like so: \n -a \\" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\\" -a2 \\" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\\", \nor so:\n -a \\"file:../multiple_adapters.fa\\" -a2 \\"file:../different_adapters.fa\\"\nPotentially in conjucntion with the parameter \\"-n 3\\" to trim all adapters. \n", "example" : [ "AGCTCCCG" ], @@ -2914,40 +2973,28 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--illumina", "description" : "Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--stranded_illumina", "description" : "Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--nextera", "description" : "Adapter sequence to be trimmed is the first 12bp of the Nextera adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--small_rna", "description" : "Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { "type" : "integer", @@ -2995,22 +3042,16 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--gzip", "description" : "Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--dont_gzip", "description" : "Output files won't be compressed with GZIP. This option overrides --gzip.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { "type" : "integer", @@ -3034,31 +3075,22 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--trim_n", "description" : "Removes Ns from either side of the read. This option does currently not work in RRBS mode.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--no_report_file", "description" : "If specified no report file will be generated.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--suppress_warn", "description" : "If specified any output to STDOUT or STDERR will be suppressed.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { "type" : "integer", @@ -3113,21 +3145,6 @@ meta = [ "direction" : "input", "multiple" : false, "multiple_sep" : ";" - }, - { - "type" : "integer", - "name" : "--cores", - "alternatives" : [ - "-j" - ], - "description" : "Number of cores to be used for trimming", - "example" : [ - 1 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" } ] }, @@ -3153,31 +3170,22 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--clock", "description" : "In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--polyA", "description" : "This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start (\\"32:A:\\") and end (\\"_PolyA:32\\") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of trimming.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--implicon", "description" : "This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it's current implementation, the UMI carrying reads come in the following format\n Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3'\n Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'\nWhere UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.\n", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" } ] }, @@ -3185,31 +3193,22 @@ meta = [ "name" : "RRBS-specific options", "arguments" : [ { - "type" : "boolean", + "type" : "boolean_true", "name" : "--rrbs", "description" : "Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3' end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5' end (by setting '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3' MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below).", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--non_directional", "description" : "Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well. Note that this option does not set '--clip_r2 2' in paired-end mode.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--keep", "description" : "Keep the quality trimmed intermediate file.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" } ] }, @@ -3217,22 +3216,16 @@ meta = [ "name" : "Paired-end specific options", "arguments" : [ { - "type" : "boolean", + "type" : "boolean_true", "name" : "--paired", "description" : "This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { - "type" : "boolean", + "type" : "boolean_true", "name" : "--retain_unpaired", "description" : "If only one of the two paired-end reads became too short, the longer read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" + "direction" : "input" }, { "type" : "integer", @@ -3281,7 +3274,7 @@ meta = [ ], "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "output", "multiple" : false, "multiple_sep" : ";" @@ -3291,7 +3284,7 @@ meta = [ "name" : "--trimmed_r1", "description" : "Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", "example" : [ - "read_1.fastq.gz" + "read_1.fastq" ], "must_exist" : true, "create_parent" : true, @@ -3305,7 +3298,7 @@ meta = [ "name" : "--trimmed_r2", "description" : "Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.", "example" : [ - "read_2.fastq.gz" + "read_2.fastq" ], "must_exist" : true, "create_parent" : true, @@ -3450,25 +3443,16 @@ meta = [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], "keywords" : [ "trimming", "adapters" ], "license" : "GPL-3.0", + "references" : { + "doi" : [ + "10.5281/zenodo.7598955" + ] + }, "links" : { "repository" : "https://github.com/FelixKrueger/TrimGalore", "homepage" : "https://github.com/FelixKrueger/TrimGalore", @@ -3552,7 +3536,7 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0", + "image" : "quay.io/biocontainers/trim-galore:0.6.10--hdfd78af_0", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", @@ -3574,46 +3558,36 @@ meta = [ "config" : "/workdir/root/repo/src/trimgalore/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/trimgalore", + "output" : "target/nextflow/trimgalore", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3635,6 +3609,9 @@ $( if [ ! -z ${VIASH_PAR_PHRED33+x} ]; then echo "${VIASH_PAR_PHRED33}" | sed "s $( if [ ! -z ${VIASH_PAR_PHRED64+x} ]; then echo "${VIASH_PAR_PHRED64}" | sed "s#'#'\\"'\\"'#g;s#.*#par_phred64='&'#" ; else echo "# par_phred64="; fi ) $( if [ ! -z ${VIASH_PAR_FASTQC+x} ]; then echo "${VIASH_PAR_FASTQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc='&'#" ; else echo "# par_fastqc="; fi ) $( if [ ! -z ${VIASH_PAR_FASTQC_ARGS+x} ]; then echo "${VIASH_PAR_FASTQC_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_args='&'#" ; else echo "# par_fastqc_args="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_CONTAMINANTS+x} ]; then echo "${VIASH_PAR_FASTQC_CONTAMINANTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_contaminants='&'#" ; else echo "# par_fastqc_contaminants="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_ADAPTERS+x} ]; then echo "${VIASH_PAR_FASTQC_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_adapters='&'#" ; else echo "# par_fastqc_adapters="; fi ) +$( if [ ! -z ${VIASH_PAR_FASTQC_LIMITS+x} ]; then echo "${VIASH_PAR_FASTQC_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_limits='&'#" ; else echo "# par_fastqc_limits="; fi ) $( if [ ! -z ${VIASH_PAR_ADAPTER+x} ]; then echo "${VIASH_PAR_ADAPTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter='&'#" ; else echo "# par_adapter="; fi ) $( if [ ! -z ${VIASH_PAR_ADAPTER2+x} ]; then echo "${VIASH_PAR_ADAPTER2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapter2='&'#" ; else echo "# par_adapter2="; fi ) $( if [ ! -z ${VIASH_PAR_ILLUMINA+x} ]; then echo "${VIASH_PAR_ILLUMINA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_illumina='&'#" ; else echo "# par_illumina="; fi ) @@ -3658,7 +3635,6 @@ $( if [ ! -z ${VIASH_PAR_THREE_PRIME_CLIP_R1+x} ]; then echo "${VIASH_PAR_THREE_ $( if [ ! -z ${VIASH_PAR_THREE_PRIME_CLIP_R2+x} ]; then echo "${VIASH_PAR_THREE_PRIME_CLIP_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime_clip_R2='&'#" ; else echo "# par_three_prime_clip_R2="; fi ) $( if [ ! -z ${VIASH_PAR_NEXTSEQ+x} ]; then echo "${VIASH_PAR_NEXTSEQ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nextseq='&'#" ; else echo "# par_nextseq="; fi ) $( if [ ! -z ${VIASH_PAR_BASENAME+x} ]; then echo "${VIASH_PAR_BASENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_basename='&'#" ; else echo "# par_basename="; fi ) -$( if [ ! -z ${VIASH_PAR_CORES+x} ]; then echo "${VIASH_PAR_CORES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cores='&'#" ; else echo "# par_cores="; fi ) $( if [ ! -z ${VIASH_PAR_HARDTRIM5+x} ]; then echo "${VIASH_PAR_HARDTRIM5}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hardtrim5='&'#" ; else echo "# par_hardtrim5="; fi ) $( if [ ! -z ${VIASH_PAR_HARDTRIM3+x} ]; then echo "${VIASH_PAR_HARDTRIM3}" | sed "s#'#'\\"'\\"'#g;s#.*#par_hardtrim3='&'#" ; else echo "# par_hardtrim3="; fi ) $( if [ ! -z ${VIASH_PAR_CLOCK+x} ]; then echo "${VIASH_PAR_CLOCK}" | sed "s#'#'\\"'\\"'#g;s#.*#par_clock='&'#" ; else echo "# par_clock="; fi ) @@ -3720,13 +3696,16 @@ unset_if_false=( par_small_rna par_gzip par_dont_gzip + par_trim_n par_no_report_file par_suppress_warn par_clock par_polyA + par_implicon par_rrbs par_non_directional - par_keep par_paired + par_keep + par_paired par_retain_unpaired ) @@ -3735,12 +3714,24 @@ for par in \\${unset_if_false[@]}; do [[ "\\$test_val" == "false" ]] && unset \\$par done +# Add FastQC file arguments to fastqc_args +fastqc_args="\\${par_fastqc_args}" +if [ -f "\\$par_fastqc_contaminants" ]; then + fastqc_args+=" --contaminants \\$par_fastqc_contaminants" +fi +if [ -f "\\$par_fastqc_adapters" ]; then + fastqc_args+=" --adapters \\$par_fastqc_adapters" +fi +if [ -f "\\$par_fastqc_limits" ]; then + fastqc_args+=" --limits \\$par_fastqc_limits" +fi + trim_galore \\\\ \\${par_quality:+-q "\\${par_quality}"} \\\\ \\${par_phred33:+--phred33} \\\\ \\${par_phred64:+--phred64 } \\\\ \\${par_fastqc:+--fastqc } \\\\ - \\${par_fastqc_args:+--fastqc_args "\\${par_fastqc_args}"} \\\\ + \\${fastqc_args:+--fastqc_args "\\${fastqc_args}"} \\\\ \\${par_adapter:+-a "\\${par_adapter}"} \\\\ \\${par_adapter2:+-a2 "\\${par_adapter2}"} \\\\ \\${par_illumina:+--illumina} \\\\ @@ -3776,7 +3767,7 @@ trim_galore \\\\ \\${par_retain_unpaired:+--retain_unpaired} \\\\ \\${par_length_1:+-r1 "\\${par_length_1}"} \\\\ \\${par_length_2:+-r2 "\\${par_length_2}"} \\\\ - \\${par_cores:+-j "\\${par_cores}"} \\\\ + \\${meta_cpus:+-j "\\${meta_cpus}"} \\\\ -o \\$par_output_dir \\\\ \\${input[*]} @@ -4173,7 +4164,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/trimgalore", + "image" : "vsh/biobox/trimgalore", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/trimgalore/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow.config similarity index 99% rename from target/nextflow/trimgalore/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow.config index 9a0f811..04ac43b 100644 --- a/target/nextflow/trimgalore/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow.config @@ -4,6 +4,7 @@ manifest { nextflowVersion = '!>=20.12.1-edge' version = 'main' description = 'A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files. \n' + author = 'Sai Nirmayi Yasa' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/trimgalore/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json similarity index 61% rename from target/nextflow/trimgalore/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json index 76bc6ff..9933493 100644 --- a/target/nextflow/trimgalore/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/nextflow_schema.json @@ -47,9 +47,10 @@ "phred33": { "type": "boolean", - "description": "Type: `boolean`. Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1", - "help_text": "Type: `boolean`. Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming." - + "description": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1", + "help_text": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming." + , + "default": "False" } @@ -57,9 +58,10 @@ "phred64": { "type": "boolean", - "description": "Type: `boolean`. Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1", - "help_text": "Type: `boolean`. Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming." - + "description": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1", + "help_text": "Type: `boolean_true`, default: `false`. Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming." + , + "default": "False" } @@ -67,9 +69,10 @@ "fastqc": { "type": "boolean", - "description": "Type: `boolean`. Run FastQC in the default mode on the FastQ file once trimming is complete", - "help_text": "Type: `boolean`. Run FastQC in the default mode on the FastQ file once trimming is complete." - + "description": "Type: `boolean_true`, default: `false`. Run FastQC in the default mode on the FastQ file once trimming is complete", + "help_text": "Type: `boolean_true`, default: `false`. Run FastQC in the default mode on the FastQ file once trimming is complete." + , + "default": "False" } @@ -77,8 +80,38 @@ "fastqc_args": { "type": "string", - "description": "Type: `string`, example: `--nogroup --outdir /home/`. Passes extra arguments to FastQC", - "help_text": "Type: `string`, example: `--nogroup --outdir /home/`. Passes extra arguments to FastQC. If more than one argument is to be passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately." + "description": "Type: `string`, example: `--nogroup --noextract`. Passes extra arguments (excluding files) to FastQC", + "help_text": "Type: `string`, example: `--nogroup --noextract`. Passes extra arguments (excluding files) to FastQC. If more than one argument is to be passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately." + + } + + + , + "fastqc_contaminants": { + "type": + "string", + "description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list of contaminants for FastQC to screen overrepresented sequences against", + "help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list of contaminants for FastQC to screen overrepresented sequences against. The file must contain sets of named contaminants in the form name[tab]sequence. Lines prefixed with a hash will be ignored." + + } + + + , + "fastqc_adapters": { + "type": + "string", + "description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of adapter sequences which which FasstQC will explicity search against the library", + "help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of adapter sequences which which FasstQC will explicity search against the library. The file must contain sets of named adapters in the form name[tab]sequence. Lines prefixed with a hash will be ignored." + + } + + + , + "fastqc_limits": { + "type": + "string", + "description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains a set of criteria which FastQC will use to determine the warn/error limits for the various modules", + "help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains a set of criteria which FastQC will use to determine the warn/error limits for the various modules. This file can also be used to selectively remove some modules from the output all together. The format needs to mirror the default limits.txt file found in the Configuration folder." } @@ -88,7 +121,7 @@ "type": "string", "description": "Type: `string`, example: `AGCTCCCG`. Adapter sequence to be trimmed", - "help_text": "Type: `string`, example: `AGCTCCCG`. Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request, multiple adapters can also be specified like so: \n -a \" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters. \n example: 20\n" + "help_text": "Type: `string`, example: `AGCTCCCG`. Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request, multiple adapters can also be specified like so: \n -a \" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters. \n" } @@ -107,9 +140,10 @@ "illumina": { "type": "boolean", - "description": "Type: `boolean`. Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter \u0027AGATCGGAAGAGC\u0027 instead of the default auto-detection of adapter sequence", - "help_text": "Type: `boolean`. Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter \u0027AGATCGGAAGAGC\u0027 instead of the default auto-detection of adapter sequence." - + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter \u0027AGATCGGAAGAGC\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter \u0027AGATCGGAAGAGC\u0027 instead of the default auto-detection of adapter sequence." + , + "default": "False" } @@ -117,9 +151,10 @@ "stranded_illumina": { "type": "boolean", - "description": "Type: `boolean`. Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter \u0027ACTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence", - "help_text": "Type: `boolean`. Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter \u0027ACTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence." - + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter \u0027ACTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter \u0027ACTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence." + , + "default": "False" } @@ -127,9 +162,10 @@ "nextera": { "type": "boolean", - "description": "Type: `boolean`. Adapter sequence to be trimmed is the first 12bp of the Nextera adapter \u0027CTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence", - "help_text": "Type: `boolean`. Adapter sequence to be trimmed is the first 12bp of the Nextera adapter \u0027CTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence." - + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Nextera adapter \u0027CTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Nextera adapter \u0027CTGTCTCTTATA\u0027 instead of the default auto-detection of adapter sequence." + , + "default": "False" } @@ -137,9 +173,10 @@ "small_rna": { "type": "boolean", - "description": "Type: `boolean`. Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3\u0027 Adapter \u0027TGGAATTCTCGG\u0027 instead of the default auto-detection of adapter sequence", - "help_text": "Type: `boolean`. Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3\u0027 Adapter \u0027TGGAATTCTCGG\u0027 instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly." - + "description": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3\u0027 Adapter \u0027TGGAATTCTCGG\u0027 instead of the default auto-detection of adapter sequence", + "help_text": "Type: `boolean_true`, default: `false`. Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3\u0027 Adapter \u0027TGGAATTCTCGG\u0027 instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly." + , + "default": "False" } @@ -187,9 +224,10 @@ "gzip": { "type": "boolean", - "description": "Type: `boolean`. Compress the output file with GZIP", - "help_text": "Type: `boolean`. Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly." - + "description": "Type: `boolean_true`, default: `false`. Compress the output file with GZIP", + "help_text": "Type: `boolean_true`, default: `false`. Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly." + , + "default": "False" } @@ -197,9 +235,10 @@ "dont_gzip": { "type": "boolean", - "description": "Type: `boolean`. Output files won\u0027t be compressed with GZIP", - "help_text": "Type: `boolean`. Output files won\u0027t be compressed with GZIP. This option overrides --gzip." - + "description": "Type: `boolean_true`, default: `false`. Output files won\u0027t be compressed with GZIP", + "help_text": "Type: `boolean_true`, default: `false`. Output files won\u0027t be compressed with GZIP. This option overrides --gzip." + , + "default": "False" } @@ -227,9 +266,10 @@ "trim_n": { "type": "boolean", - "description": "Type: `boolean`. Removes Ns from either side of the read", - "help_text": "Type: `boolean`. Removes Ns from either side of the read. This option does currently not work in RRBS mode." - + "description": "Type: `boolean_true`, default: `false`. Removes Ns from either side of the read", + "help_text": "Type: `boolean_true`, default: `false`. Removes Ns from either side of the read. This option does currently not work in RRBS mode." + , + "default": "False" } @@ -237,9 +277,10 @@ "no_report_file": { "type": "boolean", - "description": "Type: `boolean`. If specified no report file will be generated", - "help_text": "Type: `boolean`. If specified no report file will be generated." - + "description": "Type: `boolean_true`, default: `false`. If specified no report file will be generated", + "help_text": "Type: `boolean_true`, default: `false`. If specified no report file will be generated." + , + "default": "False" } @@ -247,9 +288,10 @@ "suppress_warn": { "type": "boolean", - "description": "Type: `boolean`. If specified any output to STDOUT or STDERR will be suppressed", - "help_text": "Type: `boolean`. If specified any output to STDOUT or STDERR will be suppressed." - + "description": "Type: `boolean_true`, default: `false`. If specified any output to STDOUT or STDERR will be suppressed", + "help_text": "Type: `boolean_true`, default: `false`. If specified any output to STDOUT or STDERR will be suppressed." + , + "default": "False" } @@ -313,16 +355,6 @@ } - , - "cores": { - "type": - "integer", - "description": "Type: `integer`, example: `1`. Number of cores to be used for trimming", - "help_text": "Type: `integer`, example: `1`. Number of cores to be used for trimming" - - } - - } }, @@ -357,9 +389,10 @@ "clock": { "type": "boolean", - "description": "Type: `boolean`. In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock", - "help_text": "Type: `boolean`. In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock." - + "description": "Type: `boolean_true`, default: `false`. In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock", + "help_text": "Type: `boolean_true`, default: `false`. In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock." + , + "default": "False" } @@ -367,9 +400,10 @@ "polyA": { "type": "boolean", - "description": "Type: `boolean`. This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences", - "help_text": "Type: `boolean`. This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either \u0027AAAAAAAAAA\u0027 or \u0027TTTTTTTTTT\u0027. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3\u0027-end trimming) and T{150} for Read2 (5\u0027-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user\u0027s responsibility to carry out an initial round of trimming." - + "description": "Type: `boolean_true`, default: `false`. This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences", + "help_text": "Type: `boolean_true`, default: `false`. This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either \u0027AAAAAAAAAA\u0027 or \u0027TTTTTTTTTT\u0027. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3\u0027-end trimming) and T{150} for Read2 (5\u0027-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user\u0027s responsibility to carry out an initial round of trimming." + , + "default": "False" } @@ -377,9 +411,10 @@ "implicon": { "type": "boolean", - "description": "Type: `boolean`. This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads", - "help_text": "Type: `boolean`. This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it\u0027s current implementation, the UMI carrying reads come in the following format\n Read 1 5\u0027 FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3\u0027\n Read 2 3\u0027 UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5\u0027\nWhere UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.\n" - + "description": "Type: `boolean_true`, default: `false`. This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads", + "help_text": "Type: `boolean_true`, default: `false`. This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it\u0027s current implementation, the UMI carrying reads come in the following format\n Read 1 5\u0027 FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3\u0027\n Read 2 3\u0027 UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5\u0027\nWhere UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.\n" + , + "default": "False" } @@ -397,9 +432,10 @@ "rrbs": { "type": "boolean", - "description": "Type: `boolean`. Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG)", - "help_text": "Type: `boolean`. Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3\u0027 end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5\u0027 end (by setting \u0027--clip_r2 2\u0027). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3\u0027 MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." - + "description": "Type: `boolean_true`, default: `false`. Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG)", + "help_text": "Type: `boolean_true`, default: `false`. Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3\u0027 end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5\u0027 end (by setting \u0027--clip_r2 2\u0027). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3\u0027 MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." + , + "default": "False" } @@ -407,9 +443,10 @@ "non_directional": { "type": "boolean", - "description": "Type: `boolean`. Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for \u0027CAA\u0027 or \u0027CGA\u0027 at the start of the read and, if found, removes the first two basepairs", - "help_text": "Type: `boolean`. Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for \u0027CAA\u0027 or \u0027CGA\u0027 at the start of the read and, if found, removes the first two basepairs. Like with the option \u0027--rrbs\u0027 this avoids using cytosine positions that were filled-in during the end-repair step. \u0027--non_directional\u0027 requires \u0027--rrbs\u0027 to be specified as well. Note that this option does not set \u0027--clip_r2 2\u0027 in paired-end mode." - + "description": "Type: `boolean_true`, default: `false`. Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for \u0027CAA\u0027 or \u0027CGA\u0027 at the start of the read and, if found, removes the first two basepairs", + "help_text": "Type: `boolean_true`, default: `false`. Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for \u0027CAA\u0027 or \u0027CGA\u0027 at the start of the read and, if found, removes the first two basepairs. Like with the option \u0027--rrbs\u0027 this avoids using cytosine positions that were filled-in during the end-repair step. \u0027--non_directional\u0027 requires \u0027--rrbs\u0027 to be specified as well. Note that this option does not set \u0027--clip_r2 2\u0027 in paired-end mode." + , + "default": "False" } @@ -417,9 +454,10 @@ "keep": { "type": "boolean", - "description": "Type: `boolean`. Keep the quality trimmed intermediate file", - "help_text": "Type: `boolean`. Keep the quality trimmed intermediate file." - + "description": "Type: `boolean_true`, default: `false`. Keep the quality trimmed intermediate file", + "help_text": "Type: `boolean_true`, default: `false`. Keep the quality trimmed intermediate file." + , + "default": "False" } @@ -437,9 +475,10 @@ "paired": { "type": "boolean", - "description": "Type: `boolean`. This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files", - "help_text": "Type: `boolean`. This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... ." - + "description": "Type: `boolean_true`, default: `false`. This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files", + "help_text": "Type: `boolean_true`, default: `false`. This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... ." + , + "default": "False" } @@ -447,9 +486,10 @@ "retain_unpaired": { "type": "boolean", - "description": "Type: `boolean`. If only one of the two paired-end reads became too short, the longer read will be written to either \u0027", - "help_text": "Type: `boolean`. If only one of the two paired-end reads became too short, the longer read will be written to either \u0027.unpaired_1.fq\u0027 or \u0027.unpaired_2.fq\u0027 output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2." - + "description": "Type: `boolean_true`, default: `false`. If only one of the two paired-end reads became too short, the longer read will be written to either \u0027", + "help_text": "Type: `boolean_true`, default: `false`. If only one of the two paired-end reads became too short, the longer read will be written to either \u0027.unpaired_1.fq\u0027 or \u0027.unpaired_2.fq\u0027 output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2." + , + "default": "False" } @@ -487,10 +527,10 @@ "output_dir": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output_dir.output_dir`. If specified all output will be written to this directory instead of the current directory", - "help_text": "Type: `file`, default: `$id.$key.output_dir.output_dir`. If specified all output will be written to this directory instead of the current directory." + "description": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. If specified all output will be written to this directory instead of the current directory", + "help_text": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. If specified all output will be written to this directory instead of the current directory." , - "default":"$id.$key.output_dir.output_dir" + "default": "$id.$key.output_dir.output_dir" } @@ -498,10 +538,10 @@ "trimmed_r1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trimmed_r1.gz`, example: `read_1.fastq.gz`. Output file for read 1", - "help_text": "Type: `file`, default: `$id.$key.trimmed_r1.gz`, example: `read_1.fastq.gz`. Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + "description": "Type: `file`, default: `$id.$key.trimmed_r1.fastq`, example: `read_1.fastq`. Output file for read 1", + "help_text": "Type: `file`, default: `$id.$key.trimmed_r1.fastq`, example: `read_1.fastq`. Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimmed_r1.gz" + "default": "$id.$key.trimmed_r1.fastq" } @@ -509,10 +549,10 @@ "trimmed_r2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.trimmed_r2.gz`, example: `read_2.fastq.gz`. Output file for read 2", - "help_text": "Type: `file`, default: `$id.$key.trimmed_r2.gz`, example: `read_2.fastq.gz`. Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." + "description": "Type: `file`, default: `$id.$key.trimmed_r2.fastq`, example: `read_2.fastq`. Output file for read 2", + "help_text": "Type: `file`, default: `$id.$key.trimmed_r2.fastq`, example: `read_2.fastq`. Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimmed_r2.gz" + "default": "$id.$key.trimmed_r2.fastq" } @@ -523,7 +563,7 @@ "description": "Type: `file`, default: `$id.$key.trimming_report_r1.txt`, example: `read_1.trimming_report.txt`. Trimming report for read 1", "help_text": "Type: `file`, default: `$id.$key.trimming_report_r1.txt`, example: `read_1.trimming_report.txt`. Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimming_report_r1.txt" + "default": "$id.$key.trimming_report_r1.txt" } @@ -534,7 +574,7 @@ "description": "Type: `file`, default: `$id.$key.trimming_report_r2.txt`, example: `read_2.trimming_report.txt`. Trimming report for read 1", "help_text": "Type: `file`, default: `$id.$key.trimming_report_r2.txt`, example: `read_2.trimming_report.txt`. Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimming_report_r2.txt" + "default": "$id.$key.trimming_report_r2.txt" } @@ -545,7 +585,7 @@ "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_1.html`, example: `read_1.fastqc.html`. FastQC report for trimmed (single-end) reads (or read 1 for paired-end)", "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_1.html`, example: `read_1.fastqc.html`. FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimmed_fastqc_html_1.html" + "default": "$id.$key.trimmed_fastqc_html_1.html" } @@ -556,7 +596,7 @@ "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_2.html`, example: `read_2.fastqc.html`. FastQC report for trimmed reads (read2 for paired-end)", "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_html_2.html`, example: `read_2.fastqc.html`. FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimmed_fastqc_html_2.html" + "default": "$id.$key.trimmed_fastqc_html_2.html" } @@ -567,7 +607,7 @@ "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_1.zip`, example: `read_1.fastqc.zip`. FastQC results for trimmed (single-end) reads (or read 1 for paired-end)", "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_1.zip`, example: `read_1.fastqc.zip`. FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimmed_fastqc_zip_1.zip" + "default": "$id.$key.trimmed_fastqc_zip_1.zip" } @@ -578,7 +618,7 @@ "description": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_2.zip`, example: `read_2.fastqc.zip`. FastQC results for trimmed reads (read2 for paired-end)", "help_text": "Type: `file`, default: `$id.$key.trimmed_fastqc_zip_2.zip`, example: `read_2.fastqc.zip`. FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.trimmed_fastqc_zip_2.zip" + "default": "$id.$key.trimmed_fastqc_zip_2.zip" } @@ -589,7 +629,7 @@ "description": "Type: `file`, default: `$id.$key.unpaired_r1.fastq`, example: `unpaired_read_1.fastq`. Output file for unpired read 1", "help_text": "Type: `file`, default: `$id.$key.unpaired_r1.fastq`, example: `unpaired_read_1.fastq`. Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.unpaired_r1.fastq" + "default": "$id.$key.unpaired_r1.fastq" } @@ -600,7 +640,7 @@ "description": "Type: `file`, default: `$id.$key.unpaired_r2.fastq`, example: `unpaired_read_2.fastq`. Output file for unpaired read 2", "help_text": "Type: `file`, default: `$id.$key.unpaired_r2.fastq`, example: `unpaired_read_2.fastq`. Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists." , - "default":"$id.$key.unpaired_r2.fastq" + "default": "$id.$key.unpaired_r2.fastq" } diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml new file mode 100644 index 0000000..0cd39bb --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/.config.vsh.yaml @@ -0,0 +1,638 @@ +name: "umi_tools_dedup" +namespace: "umi_tools" +version: "main" +authors: +- name: "Emma Rousseau" + roles: + - "author" + - "maintainer" + info: + links: + email: "emma@data-intuitive.com" + github: "emmarousseau" + linkedin: "emmarousseau1" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Bioinformatician" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + alternatives: + - "--stdin" + description: "Input BAM or SAM file. Use --in_sam to specify SAM format." + info: null + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--in_sam" + description: "By default, inputs are assumed to be in BAM format. Use this options\ + \ to specify the use of SAM\nformat for input.\n" + info: null + direction: "input" + - type: "file" + name: "--bai" + description: "BAM index" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--random_seed" + description: "Random seed to initialize number generator with." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + alternatives: + - "--stdout" + description: "Deduplicated BAM file." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--out_sam" + description: "By default, outputa are written in BAM format. Use this options\ + \ to specify the use of SAM format\nfor output.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--paired" + description: "BAM is paired end - output both read pairs. This will also force\ + \ the use of the template length\nto determine reads with the same mapping coordinates.\n" + info: null + direction: "input" + - type: "string" + name: "--output_stats" + description: "Generate files containing UMI based deduplication statistics files\ + \ with this prefix in the file names.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--extract_umi_method" + description: "Specify the method by which the barcodes were encoded in the read.\n\ + The options are:\n * read_id (default) \n * tag\n * umis\n" + info: null + example: + - "read_id" + required: false + choices: + - "read_id" + - "tag" + - "umis" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_tag" + description: "The tag containing the UMI sequence. This is only required if the\ + \ extract_umi_method is set to tag.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_separator" + description: "The separator used to separate the UMI from the read sequence. This\ + \ is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n" + info: null + example: + - "_" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_tag_split" + description: "Separate the UMI in tag by and take the first element." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--umi_tag_delimiter" + description: "Separate the UMI in by and concatenate the elements." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--cell_tag" + description: "The tag containing the cell barcode sequence. This is only required\ + \ if the extract_umi_method\nis set to tag.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--cell_tag_split" + description: "Separate the cell barcode in tag by and take the first element." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--cell_tag_delimiter" + description: "Separate the cell barcode in by and concatenate the\ + \ elements." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Grouping Options" + arguments: + - type: "string" + name: "--method" + description: "The method to use for grouping reads. \nThe options are: \n * unique\n\ + \ * percentile\n * cluster\n * adjacency\n * directional (default)\n" + info: null + example: + - "directional" + required: false + choices: + - "unique" + - "percentile" + - "cluster" + - "adjacency" + - "directional" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--edit_distance_threshold" + description: "For the adjacency and cluster methods the threshold for the edit\ + \ distance to connect two\nUMIs in the network can be increased. The default\ + \ value of 1 works best unless the UMI is\nvery long (>14bp). Default: `1`.\n" + info: null + example: + - 1 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--spliced_is_unique" + description: "Causes two reads that start in the same position on the same strand\ + \ and having the same UMI\nto be considered unique if one is spliced and the\ + \ other is not. (Uses the 'N' cigar operation\nto test for splicing).\n" + info: null + direction: "input" + - type: "integer" + name: "--soft_clip_threshold" + description: "Mappers that soft clip will sometimes do so rather than mapping\ + \ a spliced read if there is only\na small overhang over the exon junction.\ + \ By setting this option, you can treat reads with at\nleast this many bases\ + \ soft-clipped at the 3' end as spliced. Default: `4`.\n" + info: null + example: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--multimapping_detection_method" + description: "If the sam/bam contains tags to identify multimapping reads, you\ + \ can specify for use when selecting\nthe best read at a given loci. Supported\ + \ tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest\ + \ mapping quality will be selected.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--read_length" + description: "Use the read length as a criteria when deduping, for e.g. sRNA-Seq." + info: null + direction: "input" +- name: "Single-cell RNA-Seq Options" + arguments: + - type: "boolean_true" + name: "--per_gene" + description: "Reads will be grouped together if they have the same gene. This\ + \ is useful if your library prep\ngenerates PCR duplicates with non identical\ + \ alignment positions such as CEL-Seq. Note this option\nis hardcoded to be\ + \ on with the count command. I.e. counting is always performed per-gene. Must\ + \ be\ncombined with either --gene_tag or --per_contig option.\n" + info: null + direction: "input" + - type: "string" + name: "--gene_tag" + description: "Deduplicate per gene. The gene information is encoded in the bam\ + \ read tag specified.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--assigned_status_tag" + description: "BAM tag which describes whether a read is assigned to a gene. Defaults\ + \ to the same value as given\nfor --gene_tag.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--skip_tags_regex" + description: "Use in conjunction with the --assigned_status_tag option to skip\ + \ any reads where the tag matches\nthis regex. Default (\"^[__|Unassigned]\"\ + ) matches anything which starts with \"__\" or \"Unassigned\".\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--per_contig" + description: "Deduplicate per contig (field 3 in BAM; RNAME). All reads with the\ + \ sam contig will be considered to\nhave the same alignment position. This is\ + \ useful if you have aligned to a reference transcriptome\nwith one transcript\ + \ per gene. If you have aligned to a transcriptome with more than one transcript\n\ + per gene, you can supply a map between transcripts and gene using the --gene_transcript_map\ + \ option.\n" + info: null + direction: "input" + - type: "file" + name: "--gene_transcript_map" + description: "A file containing a mapping between gene names and transcript names.\ + \ The file should be tab\nseparated with the gene name in the first column and\ + \ the transcript name in the second column.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--per_cell" + description: "Reads will only be grouped together if they have the same cell barcode.\ + \ Can be combined with\n--per_gene.\n" + info: null + direction: "input" +- name: "SAM/BAM Options" + arguments: + - type: "integer" + name: "--mapping_quality" + description: "Minimium mapping quality (MAPQ) for a read to be retained. Default:\ + \ `0`.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--unmapped_reads" + description: "How unmapped reads should be handled. \nThe options are:\n * \"\ + discard\": Discard all unmapped reads. (default)\n * \"use\": If read2\ + \ is unmapped, deduplicate using read1 only. Requires --paired.\n * \"output\"\ + : Output unmapped reads/read pairs without UMI grouping/deduplication. Only\ + \ available in umi_tools group.\n" + info: null + example: + - "discard" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--chimeric_pairs" + description: "How chimeric pairs should be handled. \nThe options are:\n * \"\ + discard\": Discard all chimeric read pairs.\n * \"use\": Deduplicate using\ + \ read1 only. (default)\n * \"output\": Output chimeric pairs without UMI\ + \ grouping/deduplication. Only available in\n umi_tools group.\n" + info: null + example: + - "use" + required: false + choices: + - "discard" + - "use" + - "output" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--unpaired_reads" + description: "How unpaired reads should be handled. \nThe options are: \n * \"\ + discard\": Discard all unmapped reads.\n * \"use\": If read2 is unmapped, deduplicate\ + \ using read1 only. Requires --paired. (default)\n * \"output\": Output unmapped\ + \ reads/read pairs without UMI grouping/deduplication. Only available\n \ + \ in umi_tools group.\n" + info: null + example: + - "use" + required: false + choices: + - "discard" + - "use" + - "output" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--ignore_umi" + description: "Ignore the UMI and group reads using mapping coordinates only." + info: null + direction: "input" + - type: "double" + name: "--subset" + description: "Only consider a fraction of the reads, chosen at random. This is\ + \ useful for doing saturation\nanalyses.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--chrom" + description: "Only consider a single chromosome. This is useful for debugging/testing\ + \ purposes." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Group/Dedup Options" + arguments: + - type: "boolean_true" + name: "--no_sort_output" + description: "By default, output is sorted. This involves the use of a temporary\ + \ unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n" + info: null + direction: "input" + - type: "boolean_true" + name: "--buffer_whole_contig" + description: "Forces dedup to parse an entire contig before yielding any reads\ + \ for deduplication. This is the\nonly way to absolutely guarantee that all\ + \ reads with the same start position are grouped together\nfor deduplication\ + \ since dedup uses the start position of the read, not the alignment coordinate\ + \ on\nwhich the reads are sorted. However, by default, dedup reads for another\ + \ 1000bp before outputting\nread groups which will avoid any reads being missed\ + \ with short read sequencing (<1000bp).\n" + info: null + direction: "input" +- name: "Common Options" + arguments: + - type: "file" + name: "--log" + alternatives: + - "-L" + description: "File with logging information." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--log2stderr" + description: "Send logging information to stderr." + info: null + direction: "input" + - type: "integer" + name: "--verbose" + alternatives: + - "-v" + description: "Log level. The higher, the more output. Default: `0`.\n" + info: null + example: + - 0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--error" + alternatives: + - "-E" + description: "File with error information." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--temp_dir" + description: "Directory for temporary files. If not set, the bash environmental\ + \ variable TMPDIR is used.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--compresslevel" + description: "Level of Gzip compression to use. Default=6 matches GNU gzip rather\ + \ than python gzip default.\nDefault: `6`.\n" + info: null + example: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--timeit" + description: "Store timing information in file." + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_name" + description: "Name in timing file for this class of jobs. Default: `all`.\n" + info: null + example: + - "all" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_header" + description: "Add header for timing information." + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "bash_script" + path: "script.sh" + is_executable: true +description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\ + \ to the read.\n" +test_resources: +- type: "bash_script" + path: "test.sh" + is_executable: true +- type: "file" + path: "test_data" +info: null +status: "enabled" +requirements: + commands: + - "ps" +keywords: +- "umi_tools" +- "deduplication" +- "dedup" +license: "MIT" +references: + doi: + - "10.1101/gr.209601.116" +links: + repository: "https://github.com/CGATOxford/UMI-tools" + homepage: "https://umi-tools.readthedocs.io/en/latest/" + documentation: "https://umi-tools.readthedocs.io/en/latest/reference/dedup.html" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1" + target_registry: "images.viash-hub.com" + target_tag: "main" + namespace_separator: "/" + setup: + - type: "docker" + run: + - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/umi_tools/umi_tools_dedup/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/umi_tools/umi_tools_dedup" + executable: "target/nextflow/umi_tools/umi_tools_dedup/main.nf" + viash_version: "0.9.0" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" +package_config: + name: "biobox" + version: "main" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null + viash_version: "0.9.0" + source: "src" + target: "target" + config_mods: + - ".requirements.commands := ['ps']\n" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" + organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf new file mode 100644 index 0000000..4540ecf --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf @@ -0,0 +1,4157 @@ +// umi_tools_dedup main +// +// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Emma Rousseau (author, maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value instanceof String) { + try { + value = value.toInteger() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigInteger) { + value = value.intValue() + } + expectedClass = value instanceof Integer ? null : "Integer" + } else if (par.type == "long") { + // cast to long if need be + if (value instanceof String) { + try { + value = value.toLong() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof Integer) { + value = value.toLong() + } + expectedClass = value instanceof Long ? null : "Long" + } else if (par.type == "double") { + // cast to double if need be + if (value instanceof String) { + try { + value = value.toDouble() + } catch (NumberFormatException e) { + // do nothing + } + } + if (value instanceof java.math.BigDecimal) { + value = value.doubleValue() + } + if (value instanceof Float) { + value = value.toDouble() + } + expectedClass = value instanceof Double ? null : "Double" + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value instanceof String) { + def valueLower = value.toLowerCase() + if (valueLower == "true") { + value = true + } else if (valueLower == "false") { + value = false + } + } + expectedClass = value instanceof Boolean ? null : "Boolean" + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value instanceof GString) { + value = value.toString() + } + expectedClass = value instanceof String ? null : "String" + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required) { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _processOutputValues(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{[yamlFile] + outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ +mkdir -p "\$(dirname '${yamlFile}')" +echo "Storing state as yaml" +echo '${yamlBlob}' > '${yamlFile}' +echo "Copying output files to destination folder" +${copyCommands.join("\n ")} +""" +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (key, value) are the tuples that will be saved to the state.yaml file + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = val instanceof File ? val.toPath() : val + [value: value_, inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutput = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + // check output tuple + | map { id_, output_ -> + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _processOutputValues(output_, meta.config, id_, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { + output_ = output_.values()[0] + } + + [join_id, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublish = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublish, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + + // remove join_id and meta + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "umi_tools_dedup", + "namespace" : "umi_tools", + "version" : "main", + "authors" : [ + { + "name" : "Emma Rousseau", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "links" : { + "email" : "emma@data-intuitive.com", + "github" : "emmarousseau", + "linkedin" : "emmarousseau1" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Bioinformatician" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "--stdin" + ], + "description" : "Input BAM or SAM file. Use --in_sam to specify SAM format.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--in_sam", + "description" : "By default, inputs are assumed to be in BAM format. Use this options to specify the use of SAM\nformat for input.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--bai", + "description" : "BAM index", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--random_seed", + "description" : "Random seed to initialize number generator with.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "--stdout" + ], + "description" : "Deduplicated BAM file.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--out_sam", + "description" : "By default, outputa are written in BAM format. Use this options to specify the use of SAM format\nfor output.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--paired", + "description" : "BAM is paired end - output both read pairs. This will also force the use of the template length\nto determine reads with the same mapping coordinates.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--output_stats", + "description" : "Generate files containing UMI based deduplication statistics files with this prefix in the file names.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--extract_umi_method", + "description" : "Specify the method by which the barcodes were encoded in the read.\nThe options are:\n * read_id (default) \n * tag\n * umis\n", + "example" : [ + "read_id" + ], + "required" : false, + "choices" : [ + "read_id", + "tag", + "umis" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_tag", + "description" : "The tag containing the UMI sequence. This is only required if the extract_umi_method is set to tag.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_separator", + "description" : "The separator used to separate the UMI from the read sequence. This is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n", + "example" : [ + "_" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_tag_split", + "description" : "Separate the UMI in tag by and take the first element.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--umi_tag_delimiter", + "description" : "Separate the UMI in by and concatenate the elements.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--cell_tag", + "description" : "The tag containing the cell barcode sequence. This is only required if the extract_umi_method\nis set to tag.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--cell_tag_split", + "description" : "Separate the cell barcode in tag by and take the first element.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--cell_tag_delimiter", + "description" : "Separate the cell barcode in by and concatenate the elements.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Grouping Options", + "arguments" : [ + { + "type" : "string", + "name" : "--method", + "description" : "The method to use for grouping reads. \nThe options are: \n * unique\n * percentile\n * cluster\n * adjacency\n * directional (default)\n", + "example" : [ + "directional" + ], + "required" : false, + "choices" : [ + "unique", + "percentile", + "cluster", + "adjacency", + "directional" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--edit_distance_threshold", + "description" : "For the adjacency and cluster methods the threshold for the edit distance to connect two\nUMIs in the network can be increased. The default value of 1 works best unless the UMI is\nvery long (>14bp). Default: `1`.\n", + "example" : [ + 1 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--spliced_is_unique", + "description" : "Causes two reads that start in the same position on the same strand and having the same UMI\nto be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation\nto test for splicing).\n", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--soft_clip_threshold", + "description" : "Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only\na small overhang over the exon junction. By setting this option, you can treat reads with at\nleast this many bases soft-clipped at the 3' end as spliced. Default: `4`.\n", + "example" : [ + 4 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--multimapping_detection_method", + "description" : "If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting\nthe best read at a given loci. Supported tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest mapping quality will be selected.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--read_length", + "description" : "Use the read length as a criteria when deduping, for e.g. sRNA-Seq.", + "direction" : "input" + } + ] + }, + { + "name" : "Single-cell RNA-Seq Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--per_gene", + "description" : "Reads will be grouped together if they have the same gene. This is useful if your library prep\ngenerates PCR duplicates with non identical alignment positions such as CEL-Seq. Note this option\nis hardcoded to be on with the count command. I.e. counting is always performed per-gene. Must be\ncombined with either --gene_tag or --per_contig option.\n", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--gene_tag", + "description" : "Deduplicate per gene. The gene information is encoded in the bam read tag specified.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--assigned_status_tag", + "description" : "BAM tag which describes whether a read is assigned to a gene. Defaults to the same value as given\nfor --gene_tag.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--skip_tags_regex", + "description" : "Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches\nthis regex. Default (\\"^[__|Unassigned]\\") matches anything which starts with \\"__\\" or \\"Unassigned\\".\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--per_contig", + "description" : "Deduplicate per contig (field 3 in BAM; RNAME). All reads with the sam contig will be considered to\nhave the same alignment position. This is useful if you have aligned to a reference transcriptome\nwith one transcript per gene. If you have aligned to a transcriptome with more than one transcript\nper gene, you can supply a map between transcripts and gene using the --gene_transcript_map option.\n", + "direction" : "input" + }, + { + "type" : "file", + "name" : "--gene_transcript_map", + "description" : "A file containing a mapping between gene names and transcript names. The file should be tab\nseparated with the gene name in the first column and the transcript name in the second column.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--per_cell", + "description" : "Reads will only be grouped together if they have the same cell barcode. Can be combined with\n--per_gene.\n", + "direction" : "input" + } + ] + }, + { + "name" : "SAM/BAM Options", + "arguments" : [ + { + "type" : "integer", + "name" : "--mapping_quality", + "description" : "Minimium mapping quality (MAPQ) for a read to be retained. Default: `0`.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--unmapped_reads", + "description" : "How unmapped reads should be handled. \nThe options are:\n * \\"discard\\": Discard all unmapped reads. (default)\n * \\"use\\": If read2 is unmapped, deduplicate using read1 only. Requires --paired.\n * \\"output\\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available in umi_tools group.\n", + "example" : [ + "discard" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--chimeric_pairs", + "description" : "How chimeric pairs should be handled. \nThe options are:\n * \\"discard\\": Discard all chimeric read pairs.\n * \\"use\\": Deduplicate using read1 only. (default)\n * \\"output\\": Output chimeric pairs without UMI grouping/deduplication. Only available in\n umi_tools group.\n", + "example" : [ + "use" + ], + "required" : false, + "choices" : [ + "discard", + "use", + "output" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--unpaired_reads", + "description" : "How unpaired reads should be handled. \nThe options are: \n * \\"discard\\": Discard all unmapped reads.\n * \\"use\\": If read2 is unmapped, deduplicate using read1 only. Requires --paired. (default)\n * \\"output\\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available\n in umi_tools group.\n", + "example" : [ + "use" + ], + "required" : false, + "choices" : [ + "discard", + "use", + "output" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--ignore_umi", + "description" : "Ignore the UMI and group reads using mapping coordinates only.", + "direction" : "input" + }, + { + "type" : "double", + "name" : "--subset", + "description" : "Only consider a fraction of the reads, chosen at random. This is useful for doing saturation\nanalyses.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--chrom", + "description" : "Only consider a single chromosome. This is useful for debugging/testing purposes.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Group/Dedup Options", + "arguments" : [ + { + "type" : "boolean_true", + "name" : "--no_sort_output", + "description" : "By default, output is sorted. This involves the use of a temporary unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n", + "direction" : "input" + }, + { + "type" : "boolean_true", + "name" : "--buffer_whole_contig", + "description" : "Forces dedup to parse an entire contig before yielding any reads for deduplication. This is the\nonly way to absolutely guarantee that all reads with the same start position are grouped together\nfor deduplication since dedup uses the start position of the read, not the alignment coordinate on\nwhich the reads are sorted. However, by default, dedup reads for another 1000bp before outputting\nread groups which will avoid any reads being missed with short read sequencing (<1000bp).\n", + "direction" : "input" + } + ] + }, + { + "name" : "Common Options", + "arguments" : [ + { + "type" : "file", + "name" : "--log", + "alternatives" : [ + "-L" + ], + "description" : "File with logging information.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--log2stderr", + "description" : "Send logging information to stderr.", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Log level. The higher, the more output. Default: `0`.\n", + "example" : [ + 0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--error", + "alternatives" : [ + "-E" + ], + "description" : "File with error information.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--temp_dir", + "description" : "Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--compresslevel", + "description" : "Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default.\nDefault: `6`.\n", + "example" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--timeit", + "description" : "Store timing information in file.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_name", + "description" : "Name in timing file for this class of jobs. Default: `all`.\n", + "example" : [ + "all" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_header", + "description" : "Add header for timing information.", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "bash_script", + "path" : "script.sh", + "is_executable" : true + } + ], + "description" : "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n", + "test_resources" : [ + { + "type" : "bash_script", + "path" : "test.sh", + "is_executable" : true + }, + { + "type" : "file", + "path" : "test_data" + } + ], + "status" : "enabled", + "requirements" : { + "commands" : [ + "ps" + ] + }, + "keywords" : [ + "umi_tools", + "deduplication", + "dedup" + ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1101/gr.209601.116" + ] + }, + "links" : { + "repository" : "https://github.com/CGATOxford/UMI-tools", + "homepage" : "https://umi-tools.readthedocs.io/en/latest/", + "documentation" : "https://umi-tools.readthedocs.io/en/latest/reference/dedup.html" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + } + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1", + "target_registry" : "images.viash-hub.com", + "target_tag" : "main", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "docker", + "run" : [ + "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" + ] + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/umi_tools/umi_tools_dedup/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "target/nextflow/umi_tools/umi_tools_dedup", + "viash_version" : "0.9.0", + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" + }, + "package_config" : { + "name" : "biobox", + "version" : "main", + "description" : "A collection of bioinformatics tools for working with sequence data.\n", + "viash_version" : "0.9.0", + "source" : "src", + "target" : "target", + "config_mods" : [ + ".requirements.commands := ['ps']\n", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'main'" + ], + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.sh" +cat > "$tempscript" << VIASHMAIN +#!/bin/bash + +## VIASH START +# The following code has been auto-generated by Viash. +$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) +$( if [ ! -z ${VIASH_PAR_IN_SAM+x} ]; then echo "${VIASH_PAR_IN_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_in_sam='&'#" ; else echo "# par_in_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) +$( if [ ! -z ${VIASH_PAR_RANDOM_SEED+x} ]; then echo "${VIASH_PAR_RANDOM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_random_seed='&'#" ; else echo "# par_random_seed="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_OUT_SAM+x} ]; then echo "${VIASH_PAR_OUT_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_out_sam='&'#" ; else echo "# par_out_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) +$( if [ ! -z ${VIASH_PAR_EXTRACT_UMI_METHOD+x} ]; then echo "${VIASH_PAR_EXTRACT_UMI_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extract_umi_method='&'#" ; else echo "# par_extract_umi_method="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_TAG+x} ]; then echo "${VIASH_PAR_UMI_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_tag='&'#" ; else echo "# par_umi_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMI_SEPARATOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_separator='&'#" ; else echo "# par_umi_separator="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_TAG_SPLIT+x} ]; then echo "${VIASH_PAR_UMI_TAG_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_tag_split='&'#" ; else echo "# par_umi_tag_split="; fi ) +$( if [ ! -z ${VIASH_PAR_UMI_TAG_DELIMITER+x} ]; then echo "${VIASH_PAR_UMI_TAG_DELIMITER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_tag_delimiter='&'#" ; else echo "# par_umi_tag_delimiter="; fi ) +$( if [ ! -z ${VIASH_PAR_CELL_TAG+x} ]; then echo "${VIASH_PAR_CELL_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cell_tag='&'#" ; else echo "# par_cell_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_CELL_TAG_SPLIT+x} ]; then echo "${VIASH_PAR_CELL_TAG_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cell_tag_split='&'#" ; else echo "# par_cell_tag_split="; fi ) +$( if [ ! -z ${VIASH_PAR_CELL_TAG_DELIMITER+x} ]; then echo "${VIASH_PAR_CELL_TAG_DELIMITER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_cell_tag_delimiter='&'#" ; else echo "# par_cell_tag_delimiter="; fi ) +$( if [ ! -z ${VIASH_PAR_METHOD+x} ]; then echo "${VIASH_PAR_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_method='&'#" ; else echo "# par_method="; fi ) +$( if [ ! -z ${VIASH_PAR_EDIT_DISTANCE_THRESHOLD+x} ]; then echo "${VIASH_PAR_EDIT_DISTANCE_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_edit_distance_threshold='&'#" ; else echo "# par_edit_distance_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_SPLICED_IS_UNIQUE+x} ]; then echo "${VIASH_PAR_SPLICED_IS_UNIQUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_spliced_is_unique='&'#" ; else echo "# par_spliced_is_unique="; fi ) +$( if [ ! -z ${VIASH_PAR_SOFT_CLIP_THRESHOLD+x} ]; then echo "${VIASH_PAR_SOFT_CLIP_THRESHOLD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_soft_clip_threshold='&'#" ; else echo "# par_soft_clip_threshold="; fi ) +$( if [ ! -z ${VIASH_PAR_MULTIMAPPING_DETECTION_METHOD+x} ]; then echo "${VIASH_PAR_MULTIMAPPING_DETECTION_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_multimapping_detection_method='&'#" ; else echo "# par_multimapping_detection_method="; fi ) +$( if [ ! -z ${VIASH_PAR_READ_LENGTH+x} ]; then echo "${VIASH_PAR_READ_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_read_length='&'#" ; else echo "# par_read_length="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_GENE+x} ]; then echo "${VIASH_PAR_PER_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_gene='&'#" ; else echo "# par_per_gene="; fi ) +$( if [ ! -z ${VIASH_PAR_GENE_TAG+x} ]; then echo "${VIASH_PAR_GENE_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gene_tag='&'#" ; else echo "# par_gene_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_ASSIGNED_STATUS_TAG+x} ]; then echo "${VIASH_PAR_ASSIGNED_STATUS_TAG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_assigned_status_tag='&'#" ; else echo "# par_assigned_status_tag="; fi ) +$( if [ ! -z ${VIASH_PAR_SKIP_TAGS_REGEX+x} ]; then echo "${VIASH_PAR_SKIP_TAGS_REGEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_skip_tags_regex='&'#" ; else echo "# par_skip_tags_regex="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_CONTIG+x} ]; then echo "${VIASH_PAR_PER_CONTIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_contig='&'#" ; else echo "# par_per_contig="; fi ) +$( if [ ! -z ${VIASH_PAR_GENE_TRANSCRIPT_MAP+x} ]; then echo "${VIASH_PAR_GENE_TRANSCRIPT_MAP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gene_transcript_map='&'#" ; else echo "# par_gene_transcript_map="; fi ) +$( if [ ! -z ${VIASH_PAR_PER_CELL+x} ]; then echo "${VIASH_PAR_PER_CELL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_per_cell='&'#" ; else echo "# par_per_cell="; fi ) +$( if [ ! -z ${VIASH_PAR_MAPPING_QUALITY+x} ]; then echo "${VIASH_PAR_MAPPING_QUALITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_mapping_quality='&'#" ; else echo "# par_mapping_quality="; fi ) +$( if [ ! -z ${VIASH_PAR_UNMAPPED_READS+x} ]; then echo "${VIASH_PAR_UNMAPPED_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unmapped_reads='&'#" ; else echo "# par_unmapped_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_CHIMERIC_PAIRS+x} ]; then echo "${VIASH_PAR_CHIMERIC_PAIRS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chimeric_pairs='&'#" ; else echo "# par_chimeric_pairs="; fi ) +$( if [ ! -z ${VIASH_PAR_UNPAIRED_READS+x} ]; then echo "${VIASH_PAR_UNPAIRED_READS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_unpaired_reads='&'#" ; else echo "# par_unpaired_reads="; fi ) +$( if [ ! -z ${VIASH_PAR_IGNORE_UMI+x} ]; then echo "${VIASH_PAR_IGNORE_UMI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_umi='&'#" ; else echo "# par_ignore_umi="; fi ) +$( if [ ! -z ${VIASH_PAR_SUBSET+x} ]; then echo "${VIASH_PAR_SUBSET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subset='&'#" ; else echo "# par_subset="; fi ) +$( if [ ! -z ${VIASH_PAR_CHROM+x} ]; then echo "${VIASH_PAR_CHROM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chrom='&'#" ; else echo "# par_chrom="; fi ) +$( if [ ! -z ${VIASH_PAR_NO_SORT_OUTPUT+x} ]; then echo "${VIASH_PAR_NO_SORT_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_no_sort_output='&'#" ; else echo "# par_no_sort_output="; fi ) +$( if [ ! -z ${VIASH_PAR_BUFFER_WHOLE_CONTIG+x} ]; then echo "${VIASH_PAR_BUFFER_WHOLE_CONTIG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_buffer_whole_contig='&'#" ; else echo "# par_buffer_whole_contig="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG2STDERR+x} ]; then echo "${VIASH_PAR_LOG2STDERR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log2stderr='&'#" ; else echo "# par_log2stderr="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR+x} ]; then echo "${VIASH_PAR_ERROR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error='&'#" ; else echo "# par_error="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMP_DIR+x} ]; then echo "${VIASH_PAR_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temp_dir='&'#" ; else echo "# par_temp_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSLEVEL+x} ]; then echo "${VIASH_PAR_COMPRESSLEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compresslevel='&'#" ; else echo "# par_compresslevel="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT+x} ]; then echo "${VIASH_PAR_TIMEIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit='&'#" ; else echo "# par_timeit="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_NAME+x} ]; then echo "${VIASH_PAR_TIMEIT_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_name='&'#" ; else echo "# par_timeit_name="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_HEADER+x} ]; then echo "${VIASH_PAR_TIMEIT_HEADER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_header='&'#" ; else echo "# par_timeit_header="; fi ) +$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) +$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) +$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) +$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) +$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) +$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) +$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) +$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) + +## VIASH END + +set -e + +test_dir="\\${metal_executable}/test_data" + +unset_if_false=( + par_paired + par_in_sam + par_out_sam + par_spliced_is_unique + par_per_gene + par_per_contig + par_per_cell + par_no_sort_output + par_buffer_whole_contig + par_ignore_umi + par_subset + par_log2stderr + par_read_length +) + +for par in \\${unset_if_false[@]}; do + test_val="\\${!par}" + [[ "\\$test_val" == "false" ]] && unset \\$par +done + +umi_tools dedup \\\\ + --stdin "\\$par_input" \\\\ + \\${par_in_sam:+--in-sam} \\\\ + --stdout "\\$par_output" \\\\ + \\${par_out_sam:+--out-sam} \\\\ + \\${par_paired:+--paired} \\\\ + \\${par_output_stats:+--output-stats "\\$par_output_stats"} \\\\ + \\${par_extract_umi_method:+--extract-umi-method "\\$par_extract_umi_method"} \\\\ + \\${par_umi_tag:+--umi-tag "\\$par_umi_tag"} \\\\ + \\${par_umi_separator:+--umi-separator "\\$par_umi_separator"} \\\\ + \\${par_umi_tag_split:+--umi-tag-split "\\$par_umi_tag_split"} \\\\ + \\${par_umi_tag_delimiter:+--umi-tag-delimiter "\\$par_umi_tag_delimiter"} \\\\ + \\${par_cell_tag:+--cell-tag "\\$par_cell_tag"} \\\\ + \\${par_cell_tag_split:+--cell-tag-split "\\$par_cell_tag_split"} \\\\ + \\${par_cell_tag_delimiter:+--cell-tag-delimiter "\\$par_cell_tag_delimiter"} \\\\ + \\${par_method:+--method "\\$par_method"} \\\\ + \\${par_edit_distance_threshold:+--edit-distance-threshold "\\$par_edit_distance_threshold"} \\\\ + \\${par_spliced_is_unique:+--spliced-is-unique} \\\\ + \\${par_soft_clip_threshold:+--soft-clip-threshold "\\$par_soft_clip_threshold"} \\\\ + \\${par_multimapping_detection_method:+--multimapping-detection-method "\\$par_multimapping_detection_method"} \\\\ + \\${par_read_length:+--read-length} \\\\ + \\${par_per_gene:+--per-gene} \\\\ + \\${par_gene_tag:+--gene-tag "\\$par_gene_tag"} \\\\ + \\${par_assigned_status_tag:+--assigned-status-tag "\\$par_assigned_status_tag"} \\\\ + \\${par_skip_tags_regex:+--skip-tags-regex "\\$par_skip_tags_regex"} \\\\ + \\${par_per_contig:+--per-contig} \\\\ + \\${par_gene_transcript_map:+--gene-transcript-map "\\$par_gene_transcript_map"} \\\\ + \\${par_per_cell:+--per-cell} \\\\ + \\${par_mapping_quality:+--mapping-quality "\\$par_mapping_quality"} \\\\ + \\${par_unmapped_reads:+--unmapped-reads "\\$par_unmapped_reads"} \\\\ + \\${par_chimeric_pairs:+--chimeric-pairs "\\$par_chimeric_pairs"} \\\\ + \\${par_unpaired_reads:+--unpaired-reads "\\$par_unpaired_reads"} \\\\ + \\${par_ignore_umi:+--ignore-umi} \\\\ + \\${par_subset:+--subset "\\$par_subset"} \\\\ + \\${par_chrom:+--chrom "\\$par_chrom"} \\\\ + \\${par_no_sort_output:+--no-sort-output} \\\\ + \\${par_buffer_whole_contig:+--buffer-whole-contig} \\\\ + \\${par_log:+-L "\\$par_log"} \\\\ + \\${par_log2stderr:+--log2stderr} \\\\ + \\${par_verbose:+-v "\\$par_verbose"} \\\\ + \\${par_error:+-E "\\$par_error"} \\\\ + \\${par_temp_dir:+--temp-dir "\\$par_temp_dir"} \\\\ + \\${par_compresslevel:+--compresslevel "\\$par_compresslevel"} \\\\ + \\${par_timeit:+--timeit "\\$par_timeit"} \\\\ + \\${par_timeit_name:+--timeit-name "\\$par_timeit_name"} \\\\ + \\${par_timeit_header:+--timeit-header "\\$par_timeit_header"} \\\\ + \\${par_random_seed:+--random-seed "\\$par_random_seed"} + +exit 0 +VIASHMAIN +bash "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = new nextflow.script.ScriptParser(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/biobox/umi_tools/umi_tools_dedup", + "tag" : "main" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/umitools/umitools_dedup/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow.config similarity index 98% rename from target/nextflow/umitools/umitools_dedup/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow.config index e3f25a6..7e0694a 100644 --- a/target/nextflow/umitools/umitools_dedup/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow.config @@ -1,9 +1,10 @@ manifest { - name = 'umitools/umitools_dedup' + name = 'umi_tools/umi_tools_dedup' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' description = 'Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n' + author = 'Emma Rousseau' } process.container = 'nextflow/bash:latest' diff --git a/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json new file mode 100644 index 0000000..834b53a --- /dev/null +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/nextflow_schema.json @@ -0,0 +1,635 @@ +{ +"$schema": "http://json-schema.org/draft-07/schema", +"title": "umi_tools_dedup", +"description": "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n", +"type": "object", +"definitions": { + + + + "inputs" : { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + + + "input": { + "type": + "string", + "description": "Type: `file`, required. Input BAM or SAM file", + "help_text": "Type: `file`, required. Input BAM or SAM file. Use --in_sam to specify SAM format." + + } + + + , + "in_sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, inputs are assumed to be in BAM format", + "help_text": "Type: `boolean_true`, default: `false`. By default, inputs are assumed to be in BAM format. Use this options to specify the use of SAM\nformat for input.\n" + , + "default": "False" + } + + + , + "bai": { + "type": + "string", + "description": "Type: `file`. BAM index", + "help_text": "Type: `file`. BAM index" + + } + + + , + "random_seed": { + "type": + "integer", + "description": "Type: `integer`. Random seed to initialize number generator with", + "help_text": "Type: `integer`. Random seed to initialize number generator with." + + } + + +} +}, + + + "outputs" : { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + + + "output": { + "type": + "string", + "description": "Type: `file`, required, default: `$id.$key.output.output`. Deduplicated BAM file", + "help_text": "Type: `file`, required, default: `$id.$key.output.output`. Deduplicated BAM file." + , + "default": "$id.$key.output.output" + } + + + , + "out_sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, outputa are written in BAM format", + "help_text": "Type: `boolean_true`, default: `false`. By default, outputa are written in BAM format. Use this options to specify the use of SAM format\nfor output.\n" + , + "default": "False" + } + + + , + "paired": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. BAM is paired end - output both read pairs", + "help_text": "Type: `boolean_true`, default: `false`. BAM is paired end - output both read pairs. This will also force the use of the template length\nto determine reads with the same mapping coordinates.\n" + , + "default": "False" + } + + + , + "output_stats": { + "type": + "string", + "description": "Type: `string`. Generate files containing UMI based deduplication statistics files with this prefix in the file names", + "help_text": "Type: `string`. Generate files containing UMI based deduplication statistics files with this prefix in the file names.\n" + + } + + + , + "extract_umi_method": { + "type": + "string", + "description": "Type: `string`, example: `read_id`, choices: ``read_id`, `tag`, `umis``. Specify the method by which the barcodes were encoded in the read", + "help_text": "Type: `string`, example: `read_id`, choices: ``read_id`, `tag`, `umis``. Specify the method by which the barcodes were encoded in the read.\nThe options are:\n * read_id (default) \n * tag\n * umis\n", + "enum": ["read_id", "tag", "umis"] + + + } + + + , + "umi_tag": { + "type": + "string", + "description": "Type: `string`. The tag containing the UMI sequence", + "help_text": "Type: `string`. The tag containing the UMI sequence. This is only required if the extract_umi_method is set to tag.\n" + + } + + + , + "umi_separator": { + "type": + "string", + "description": "Type: `string`, example: `_`. The separator used to separate the UMI from the read sequence", + "help_text": "Type: `string`, example: `_`. The separator used to separate the UMI from the read sequence. This is only required if the\nextract_umi_method is set to id_read. Default: `_`.\n" + + } + + + , + "umi_tag_split": { + "type": + "string", + "description": "Type: `string`. Separate the UMI in tag by \u003cSPLIT\u003e and take the first element", + "help_text": "Type: `string`. Separate the UMI in tag by \u003cSPLIT\u003e and take the first element." + + } + + + , + "umi_tag_delimiter": { + "type": + "string", + "description": "Type: `string`. Separate the UMI in by \u003cDELIMITER\u003e and concatenate the elements", + "help_text": "Type: `string`. Separate the UMI in by \u003cDELIMITER\u003e and concatenate the elements." + + } + + + , + "cell_tag": { + "type": + "string", + "description": "Type: `string`. The tag containing the cell barcode sequence", + "help_text": "Type: `string`. The tag containing the cell barcode sequence. This is only required if the extract_umi_method\nis set to tag.\n" + + } + + + , + "cell_tag_split": { + "type": + "string", + "description": "Type: `string`. Separate the cell barcode in tag by \u003cSPLIT\u003e and take the first element", + "help_text": "Type: `string`. Separate the cell barcode in tag by \u003cSPLIT\u003e and take the first element." + + } + + + , + "cell_tag_delimiter": { + "type": + "string", + "description": "Type: `string`. Separate the cell barcode in by \u003cDELIMITER\u003e and concatenate the elements", + "help_text": "Type: `string`. Separate the cell barcode in by \u003cDELIMITER\u003e and concatenate the elements." + + } + + +} +}, + + + "grouping options" : { + "title": "Grouping Options", + "type": "object", + "description": "No description", + "properties": { + + + "method": { + "type": + "string", + "description": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. The method to use for grouping reads", + "help_text": "Type: `string`, example: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. The method to use for grouping reads. \nThe options are: \n * unique\n * percentile\n * cluster\n * adjacency\n * directional (default)\n", + "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] + + + } + + + , + "edit_distance_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `1`. For the adjacency and cluster methods the threshold for the edit distance to connect two\nUMIs in the network can be increased", + "help_text": "Type: `integer`, example: `1`. For the adjacency and cluster methods the threshold for the edit distance to connect two\nUMIs in the network can be increased. The default value of 1 works best unless the UMI is\nvery long (\u003e14bp). Default: `1`.\n" + + } + + + , + "spliced_is_unique": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Causes two reads that start in the same position on the same strand and having the same UMI\nto be considered unique if one is spliced and the other is not", + "help_text": "Type: `boolean_true`, default: `false`. Causes two reads that start in the same position on the same strand and having the same UMI\nto be considered unique if one is spliced and the other is not. (Uses the \u0027N\u0027 cigar operation\nto test for splicing).\n" + , + "default": "False" + } + + + , + "soft_clip_threshold": { + "type": + "integer", + "description": "Type: `integer`, example: `4`. Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only\na small overhang over the exon junction", + "help_text": "Type: `integer`, example: `4`. Mappers that soft clip will sometimes do so rather than mapping a spliced read if there is only\na small overhang over the exon junction. By setting this option, you can treat reads with at\nleast this many bases soft-clipped at the 3\u0027 end as spliced. Default: `4`.\n" + + } + + + , + "multimapping_detection_method": { + "type": + "string", + "description": "Type: `string`. If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting\nthe best read at a given loci", + "help_text": "Type: `string`. If the sam/bam contains tags to identify multimapping reads, you can specify for use when selecting\nthe best read at a given loci. Supported tags are `NH`, `X0` and `XT`. If not specified, the read\nwith the highest mapping quality will be selected.\n" + + } + + + , + "read_length": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Use the read length as a criteria when deduping, for e", + "help_text": "Type: `boolean_true`, default: `false`. Use the read length as a criteria when deduping, for e.g. sRNA-Seq." + , + "default": "False" + } + + +} +}, + + + "single-cell rna-seq options" : { + "title": "Single-cell RNA-Seq Options", + "type": "object", + "description": "No description", + "properties": { + + + "per_gene": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Reads will be grouped together if they have the same gene", + "help_text": "Type: `boolean_true`, default: `false`. Reads will be grouped together if they have the same gene. This is useful if your library prep\ngenerates PCR duplicates with non identical alignment positions such as CEL-Seq. Note this option\nis hardcoded to be on with the count command. I.e. counting is always performed per-gene. Must be\ncombined with either --gene_tag or --per_contig option.\n" + , + "default": "False" + } + + + , + "gene_tag": { + "type": + "string", + "description": "Type: `string`. Deduplicate per gene", + "help_text": "Type: `string`. Deduplicate per gene. The gene information is encoded in the bam read tag specified.\n" + + } + + + , + "assigned_status_tag": { + "type": + "string", + "description": "Type: `string`. BAM tag which describes whether a read is assigned to a gene", + "help_text": "Type: `string`. BAM tag which describes whether a read is assigned to a gene. Defaults to the same value as given\nfor --gene_tag.\n" + + } + + + , + "skip_tags_regex": { + "type": + "string", + "description": "Type: `string`. Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches\nthis regex", + "help_text": "Type: `string`. Use in conjunction with the --assigned_status_tag option to skip any reads where the tag matches\nthis regex. Default (\"^[__|Unassigned]\") matches anything which starts with \"__\" or \"Unassigned\".\n" + + } + + + , + "per_contig": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Deduplicate per contig (field 3 in BAM; RNAME)", + "help_text": "Type: `boolean_true`, default: `false`. Deduplicate per contig (field 3 in BAM; RNAME). All reads with the sam contig will be considered to\nhave the same alignment position. This is useful if you have aligned to a reference transcriptome\nwith one transcript per gene. If you have aligned to a transcriptome with more than one transcript\nper gene, you can supply a map between transcripts and gene using the --gene_transcript_map option.\n" + , + "default": "False" + } + + + , + "gene_transcript_map": { + "type": + "string", + "description": "Type: `file`. A file containing a mapping between gene names and transcript names", + "help_text": "Type: `file`. A file containing a mapping between gene names and transcript names. The file should be tab\nseparated with the gene name in the first column and the transcript name in the second column.\n" + + } + + + , + "per_cell": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Reads will only be grouped together if they have the same cell barcode", + "help_text": "Type: `boolean_true`, default: `false`. Reads will only be grouped together if they have the same cell barcode. Can be combined with\n--per_gene.\n" + , + "default": "False" + } + + +} +}, + + + "sam/bam options" : { + "title": "SAM/BAM Options", + "type": "object", + "description": "No description", + "properties": { + + + "mapping_quality": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Minimium mapping quality (MAPQ) for a read to be retained", + "help_text": "Type: `integer`, example: `0`. Minimium mapping quality (MAPQ) for a read to be retained. Default: `0`.\n" + + } + + + , + "unmapped_reads": { + "type": + "string", + "description": "Type: `string`, example: `discard`. How unmapped reads should be handled", + "help_text": "Type: `string`, example: `discard`. How unmapped reads should be handled. \nThe options are:\n * \"discard\": Discard all unmapped reads. (default)\n * \"use\": If read2 is unmapped, deduplicate using read1 only. Requires --paired.\n * \"output\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available in umi_tools group.\n" + + } + + + , + "chimeric_pairs": { + "type": + "string", + "description": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How chimeric pairs should be handled", + "help_text": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How chimeric pairs should be handled. \nThe options are:\n * \"discard\": Discard all chimeric read pairs.\n * \"use\": Deduplicate using read1 only. (default)\n * \"output\": Output chimeric pairs without UMI grouping/deduplication. Only available in\n umi_tools group.\n", + "enum": ["discard", "use", "output"] + + + } + + + , + "unpaired_reads": { + "type": + "string", + "description": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How unpaired reads should be handled", + "help_text": "Type: `string`, example: `use`, choices: ``discard`, `use`, `output``. How unpaired reads should be handled. \nThe options are: \n * \"discard\": Discard all unmapped reads.\n * \"use\": If read2 is unmapped, deduplicate using read1 only. Requires --paired. (default)\n * \"output\": Output unmapped reads/read pairs without UMI grouping/deduplication. Only available\n in umi_tools group.\n", + "enum": ["discard", "use", "output"] + + + } + + + , + "ignore_umi": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Ignore the UMI and group reads using mapping coordinates only", + "help_text": "Type: `boolean_true`, default: `false`. Ignore the UMI and group reads using mapping coordinates only." + , + "default": "False" + } + + + , + "subset": { + "type": + "number", + "description": "Type: `double`. Only consider a fraction of the reads, chosen at random", + "help_text": "Type: `double`. Only consider a fraction of the reads, chosen at random. This is useful for doing saturation\nanalyses.\n" + + } + + + , + "chrom": { + "type": + "string", + "description": "Type: `string`. Only consider a single chromosome", + "help_text": "Type: `string`. Only consider a single chromosome. This is useful for debugging/testing purposes." + + } + + +} +}, + + + "group/dedup options" : { + "title": "Group/Dedup Options", + "type": "object", + "description": "No description", + "properties": { + + + "no_sort_output": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. By default, output is sorted", + "help_text": "Type: `boolean_true`, default: `false`. By default, output is sorted. This involves the use of a temporary unsorted file (saved in\n--temp_dir). Use this option to turn off sorting.\n" + , + "default": "False" + } + + + , + "buffer_whole_contig": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Forces dedup to parse an entire contig before yielding any reads for deduplication", + "help_text": "Type: `boolean_true`, default: `false`. Forces dedup to parse an entire contig before yielding any reads for deduplication. This is the\nonly way to absolutely guarantee that all reads with the same start position are grouped together\nfor deduplication since dedup uses the start position of the read, not the alignment coordinate on\nwhich the reads are sorted. However, by default, dedup reads for another 1000bp before outputting\nread groups which will avoid any reads being missed with short read sequencing (\u003c1000bp).\n" + , + "default": "False" + } + + +} +}, + + + "common options" : { + "title": "Common Options", + "type": "object", + "description": "No description", + "properties": { + + + "log": { + "type": + "string", + "description": "Type: `file`. File with logging information", + "help_text": "Type: `file`. File with logging information." + + } + + + , + "log2stderr": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Send logging information to stderr", + "help_text": "Type: `boolean_true`, default: `false`. Send logging information to stderr." + , + "default": "False" + } + + + , + "verbose": { + "type": + "integer", + "description": "Type: `integer`, example: `0`. Log level", + "help_text": "Type: `integer`, example: `0`. Log level. The higher, the more output. Default: `0`.\n" + + } + + + , + "error": { + "type": + "string", + "description": "Type: `file`. File with error information", + "help_text": "Type: `file`. File with error information." + + } + + + , + "temp_dir": { + "type": + "string", + "description": "Type: `string`. Directory for temporary files", + "help_text": "Type: `string`. Directory for temporary files. If not set, the bash environmental variable TMPDIR is used.\n" + + } + + + , + "compresslevel": { + "type": + "integer", + "description": "Type: `integer`, example: `6`. Level of Gzip compression to use", + "help_text": "Type: `integer`, example: `6`. Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default.\nDefault: `6`.\n" + + } + + + , + "timeit": { + "type": + "string", + "description": "Type: `file`. Store timing information in file", + "help_text": "Type: `file`. Store timing information in file." + + } + + + , + "timeit_name": { + "type": + "string", + "description": "Type: `string`, example: `all`. Name in timing file for this class of jobs", + "help_text": "Type: `string`, example: `all`. Name in timing file for this class of jobs. Default: `all`.\n" + + } + + + , + "timeit_header": { + "type": + "string", + "description": "Type: `string`. Add header for timing information", + "help_text": "Type: `string`. Add header for timing information." + + } + + +} +}, + + + "nextflow input-output arguments" : { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + + + "publish_dir": { + "type": + "string", + "description": "Type: `string`, required, example: `output/`. Path to an output directory", + "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." + + } + + + , + "param_list": { + "type": + "string", + "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", + "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", + "hidden": true + + } + + +} +} +}, +"allOf": [ + + { + "$ref": "#/definitions/inputs" + }, + + { + "$ref": "#/definitions/outputs" + }, + + { + "$ref": "#/definitions/grouping options" + }, + + { + "$ref": "#/definitions/single-cell rna-seq options" + }, + + { + "$ref": "#/definitions/sam/bam options" + }, + + { + "$ref": "#/definitions/group/dedup options" + }, + + { + "$ref": "#/definitions/common options" + }, + + { + "$ref": "#/definitions/nextflow input-output arguments" + } +] +} diff --git a/target/executable/kallisto/kallisto_quant/.config.vsh.yaml b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml similarity index 57% rename from target/executable/kallisto/kallisto_quant/.config.vsh.yaml rename to target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml index 3016f58..aacf4b2 100644 --- a/target/executable/kallisto/kallisto_quant/.config.vsh.yaml +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/.config.vsh.yaml @@ -1,80 +1,20 @@ -name: "kallisto_quant" -namespace: "kallisto" +name: "umi_tools_prepareforrsem" +namespace: "umi_tools" version: "main" argument_groups: - name: "Input" arguments: - type: "file" name: "--input" - description: "List of input FastQ files of size 1 and 2 for single-end and paired-end\ - \ data, respectively." + alternatives: + - "-I" + - "--stdin" info: null + example: + - "$id.transcriptome.bam" must_exist: true create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: "," - - type: "boolean" - name: "--paired" - description: "Paired reads or not." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--strandedness" - description: "Sample strand-specificity." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--index" - description: "Kallisto genome index." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--gtf" - description: "Optional gtf file for translation of transcripts into genomic coordinates." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--chromosomes" - description: "Optional tab separated file with chromosome names and lengths." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--fragment_length" - description: "For single-end mode only, the estimated average fragment length." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--fragment_length_sd" - description: "For single-end mode only, the estimated standard deviation of the\ - \ fragment length." - info: null - required: false + required: true direction: "input" multiple: false multiple_sep: ";" @@ -82,10 +22,12 @@ argument_groups: arguments: - type: "file" name: "--output" - description: "Kallisto quant results" + alternatives: + - "-S" + - "--stdout" info: null - default: - - "$id.kallisto_quant_results" + example: + - "$id.transcriptome_sorted.bam" must_exist: true create_parent: true required: false @@ -94,10 +36,10 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--log" - description: "File containing log information from running kallisto quant" + alternatives: + - "-L" + description: "File with logging information [default = stdout]." info: null - default: - - "$id.kallisto_quant.log.txt" must_exist: true create_parent: true required: false @@ -105,64 +47,128 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--run_info" - description: "A json file containing information about the run" + name: "--error" + alternatives: + - "-E" + description: "File with error information [default = stderr]." info: null - default: - - "$id.run_info.json" must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - - type: "file" - name: "--quant_results_file" - description: "TSV file containing abundance estimates from Kallisto" + - type: "boolean_true" + name: "--log2stderr" + description: "Send logging information to stderr [default = False]." + info: null + direction: "input" + - type: "string" + name: "--temp_dir" + description: "Directory for temporary files. If not set, the bash environmental\ + \ variable \nTMPDIR is used.\n" info: null - default: - - "$id.abundance.tsv" - must_exist: true - create_parent: true required: false - direction: "output" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--compresslevel" + description: "Level of Gzip compression to use. Default (6) matchesGNU gzip rather\ + \ than python \ngzip default (which is 9).\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Options" + arguments: + - type: "string" + name: "--tags" + description: "Comma-seperated list of tags to transfer from read1 to read2 (Default:\ + \ 'UG,BX')\n" + info: null + example: + - "UG,BX" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--sam" + description: "Input and output SAM rather than BAM." + info: null + direction: "input" + - type: "string" + name: "--timeit" + description: "Store timeing information in file [none].\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--timeit_name" + description: "Name in timing file for this class of jobs [all].\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--timeit_header" + description: "Add header for timing information [none]." + info: null + direction: "input" + - type: "integer" + name: "--verbose" + alternatives: + - "-v" + description: "Loglevel [1]. The higher, the more output.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--random_seed" + description: "Random seed to initialize number generator with [none].\n" + info: null + required: false + direction: "input" multiple: false multiple_sep: ";" resources: - type: "bash_script" path: "script.sh" is_executable: true -description: "Computes equivalence classes for reads and quantifies abundances.\n" +- type: "file" + path: "prepare-for-rsem.py" +description: "Make the output from umi-tools dedup or group compatible with RSEM" test_resources: - type: "bash_script" path: "test.sh" is_executable: true - type: "file" - path: "transcriptome.fasta" -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/kallisto/quant/main.nf" - - "modules/nf-core/kallisto/quant/meta.yml" - last_sha: "aff1d2e02717247831644769fc3ba84868c3fdde" + path: "test_data" +info: null status: "enabled" requirements: commands: - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" +keywords: +- "umi_tools" +- "rsem" +- "bam" +- "sam" +license: "MIT" +references: + doi: + - "10.1101/gr.209601.116" +links: + repository: "https://github.com/CGATOxford/UMI-tools" + homepage: "https://umi-tools.readthedocs.io/en/latest/" + documentation: "https://umi-tools.readthedocs.io/en/latest/reference/extract.html" runners: - type: "executable" id: "executable" @@ -231,53 +237,47 @@ runners: engines: - type: "docker" id: "docker" - image: "ubuntu:22.04" + image: "quay.io/biocontainers/umi_tools:1.1.5--py38h0020b31_3" target_registry: "images.viash-hub.com" target_tag: "main" namespace_separator: "/" setup: - type: "docker" run: - - "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\ - \nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\ - \ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\ - \ /usr/local/bin/\n" + - "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" entrypoint: [] cmd: null - type: "native" id: "native" build_info: - config: "src/kallisto/kallisto_quant/config.vsh.yaml" - runner: "executable" + config: "src/umi_tools/umi_tools_prepareforrsem/config.vsh.yaml" + runner: "nextflow" engine: "docker|native" - output: "target/executable/kallisto/kallisto_quant" - executable: "target/executable/kallisto/kallisto_quant/kallisto_quant" + output: "target/nextflow/umi_tools/umi_tools_prepareforrsem" + executable: "target/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55" + git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox" + git_tag: "v0.2.0-26-ga13b57d" package_config: - name: "rnaseq" + name: "biobox" version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" + description: "A collection of bioinformatics tools for working with sequence data.\n" + info: null viash_version: "0.9.0" source: "src" target: "target" config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" + - ".requirements.commands := ['ps']\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" + keywords: + - "bioinformatics" + - "modules" + - "sequencing" + license: "MIT" organization: "vsh" + links: + repository: "https://github.com/viash-hub/biobox" + issue_tracker: "https://github.com/viash-hub/biobox/issues" diff --git a/target/nextflow/bbmap_bbsplit/main.nf b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf similarity index 94% rename from target/nextflow/bbmap_bbsplit/main.nf rename to target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf index d4ed73d..2412e04 100644 --- a/target/nextflow/bbmap_bbsplit/main.nf +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf @@ -1,4 +1,4 @@ -// bbmap_bbsplit main +// umi_tools_prepareforrsem main // // This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -2804,85 +2804,26 @@ nextflow.enable.dsl=2 meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ - "name" : "bbmap_bbsplit", + "name" : "umi_tools_prepareforrsem", + "namespace" : "umi_tools", "version" : "main", "argument_groups" : [ { "name" : "Input", "arguments" : [ - { - "type" : "string", - "name" : "--id", - "description" : "Sample ID", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "boolean", - "name" : "--paired", - "description" : "Paired fastq files or not?", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "file", "name" : "--input", - "description" : "Input fastq files, either one or two (paired)", + "alternatives" : [ + "-I", + "--stdin" + ], "example" : [ - "sample.fastq" + "$id.transcriptome.bam" ], "must_exist" : true, "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : true, - "multiple_sep" : "," - }, - { - "type" : "file", - "name" : "--primary_ref", - "description" : "Primary reference FASTA", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--bbsplit_fasta_list", - "description" : "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit.", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "boolean", - "name" : "--only_build_index", - "description" : "true = only build index; false = mapping", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--built_bbsplit_index", - "description" : "Directory with index files", - "must_exist" : true, - "create_parent" : true, - "required" : false, + "required" : true, "direction" : "input", "multiple" : false, "multiple_sep" : ";" @@ -2894,12 +2835,15 @@ meta = [ "arguments" : [ { "type" : "file", - "name" : "--fastq_1", - "description" : "Output file for read 1.", - "default" : [ - "$id.$key.read_1.fastq" + "name" : "--output", + "alternatives" : [ + "-S", + "--stdout" ], - "must_exist" : false, + "example" : [ + "$id.transcriptome_sorted.bam" + ], + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2908,12 +2852,12 @@ meta = [ }, { "type" : "file", - "name" : "--fastq_2", - "description" : "Output file for read 2.", - "default" : [ - "$id.$key.read_2.fastq" + "name" : "--log", + "alternatives" : [ + "-L" ], - "must_exist" : false, + "description" : "File with logging information [default = stdout].", + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -2922,17 +2866,109 @@ meta = [ }, { "type" : "file", - "name" : "--bbsplit_index", - "description" : "Directory with index files", - "default" : [ - "BBSplit_index" + "name" : "--error", + "alternatives" : [ + "-E" ], - "must_exist" : false, + "description" : "File with error information [default = stderr].", + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", "multiple" : false, "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--log2stderr", + "description" : "Send logging information to stderr [default = False].", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--temp_dir", + "description" : "Directory for temporary files. If not set, the bash environmental variable \nTMPDIR is used.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--compresslevel", + "description" : "Level of Gzip compression to use. Default (6) matchesGNU gzip rather than python \ngzip default (which is 9).\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Options", + "arguments" : [ + { + "type" : "string", + "name" : "--tags", + "description" : "Comma-seperated list of tags to transfer from read1 to read2 (Default: 'UG,BX')\n", + "example" : [ + "UG,BX" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--sam", + "description" : "Input and output SAM rather than BAM.", + "direction" : "input" + }, + { + "type" : "string", + "name" : "--timeit", + "description" : "Store timeing information in file [none].\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--timeit_name", + "description" : "Name in timing file for this class of jobs [all].\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--timeit_header", + "description" : "Add header for timing information [none].", + "direction" : "input" + }, + { + "type" : "integer", + "name" : "--verbose", + "alternatives" : [ + "-v" + ], + "description" : "Loglevel [1]. The higher, the more output.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--random_seed", + "description" : "Random seed to initialize number generator with [none].\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" } ] } @@ -2942,9 +2978,13 @@ meta = [ "type" : "bash_script", "path" : "script.sh", "is_executable" : true + }, + { + "type" : "file", + "path" : "prepare-for-rsem.py" } ], - "description" : "Split sequencing reads by mapping them to multiple references simultaneously.\n", + "description" : "Make the output from umi-tools dedup or group compatible with RSEM", "test_resources" : [ { "type" : "bash_script", @@ -2953,55 +2993,32 @@ meta = [ }, { "type" : "file", - "path" : "/testData/minimal_test/reference/genome.fasta" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/reference/bbsplit_fasta/human.fa" + "path" : "test_data" } ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/bbmap/bbsplit/main.nf", - "modules/nf-core/bbmap/bbsplit/meta.yml" - ], - "last_sha" : "277bd337739a8b8f753fa7b5eda6743b9b6acb89" - } - }, "status" : "enabled", "requirements" : { "commands" : [ "ps" ] }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } + "keywords" : [ + "umi_tools", + "rsem", + "bam", + "sam" ], + "license" : "MIT", + "references" : { + "doi" : [ + "10.1101/gr.209601.116" + ] + }, + "links" : { + "repository" : "https://github.com/CGATOxford/UMI-tools", + "homepage" : "https://umi-tools.readthedocs.io/en/latest/", + "documentation" : "https://umi-tools.readthedocs.io/en/latest/reference/extract.html" + }, "runners" : [ { "type" : "executable", @@ -3080,7 +3097,7 @@ meta = [ { "type" : "docker", "id" : "docker", - "image" : "ubuntu:22.04", + "image" : "quay.io/biocontainers/umi_tools:1.1.5--py38h0020b31_3", "target_registry" : "images.viash-hub.com", "target_tag" : "main", "namespace_separator" : "/", @@ -3088,7 +3105,7 @@ meta = [ { "type" : "docker", "run" : [ - "apt-get update && \\\\\napt-get install -y build-essential openjdk-17-jdk wget tar && \\\\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \\\\\ntar xzf BBMap_39.01.tar.gz && \\\\\ncp -r bbmap/* /usr/local/bin\n" + "umi_tools -v | sed 's/ version//g' > /var/software_versions.txt\n" ] } ] @@ -3099,49 +3116,39 @@ meta = [ } ], "build_info" : { - "config" : "/workdir/root/repo/src/bbmap_bbsplit/config.vsh.yaml", + "config" : "/workdir/root/repo/src/umi_tools/umi_tools_prepareforrsem/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/bbmap_bbsplit", + "output" : "target/nextflow/umi_tools/umi_tools_prepareforrsem", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55", + "git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox", + "git_tag" : "v0.2.0-26-ga13b57d" }, "package_config" : { - "name" : "rnaseq", + "name" : "biobox", "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], + "description" : "A collection of bioinformatics tools for working with sequence data.\n", "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", + "source" : "src", + "target" : "target", "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", + ".requirements.commands := ['ps']\n", ".engines += { type: \\"native\\" }", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_tag := 'main'" ], - "organization" : "vsh" + "keywords" : [ + "bioinformatics", + "modules", + "sequencing" + ], + "license" : "MIT", + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/viash-hub/biobox", + "issue_tracker" : "https://github.com/viash-hub/biobox/issues" + } } }''')) ] @@ -3157,16 +3164,20 @@ tempscript=".viash_script.sh" cat > "$tempscript" << VIASHMAIN ## VIASH START # The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_PRIMARY_REF+x} ]; then echo "${VIASH_PAR_PRIMARY_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_primary_ref='&'#" ; else echo "# par_primary_ref="; fi ) -$( if [ ! -z ${VIASH_PAR_BBSPLIT_FASTA_LIST+x} ]; then echo "${VIASH_PAR_BBSPLIT_FASTA_LIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bbsplit_fasta_list='&'#" ; else echo "# par_bbsplit_fasta_list="; fi ) -$( if [ ! -z ${VIASH_PAR_ONLY_BUILD_INDEX+x} ]; then echo "${VIASH_PAR_ONLY_BUILD_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_only_build_index='&'#" ; else echo "# par_only_build_index="; fi ) -$( if [ ! -z ${VIASH_PAR_BUILT_BBSPLIT_INDEX+x} ]; then echo "${VIASH_PAR_BUILT_BBSPLIT_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_built_bbsplit_index='&'#" ; else echo "# par_built_bbsplit_index="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) -$( if [ ! -z ${VIASH_PAR_BBSPLIT_INDEX+x} ]; then echo "${VIASH_PAR_BBSPLIT_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bbsplit_index='&'#" ; else echo "# par_bbsplit_index="; fi ) +$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) +$( if [ ! -z ${VIASH_PAR_ERROR+x} ]; then echo "${VIASH_PAR_ERROR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_error='&'#" ; else echo "# par_error="; fi ) +$( if [ ! -z ${VIASH_PAR_LOG2STDERR+x} ]; then echo "${VIASH_PAR_LOG2STDERR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log2stderr='&'#" ; else echo "# par_log2stderr="; fi ) +$( if [ ! -z ${VIASH_PAR_TEMP_DIR+x} ]; then echo "${VIASH_PAR_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_temp_dir='&'#" ; else echo "# par_temp_dir="; fi ) +$( if [ ! -z ${VIASH_PAR_COMPRESSLEVEL+x} ]; then echo "${VIASH_PAR_COMPRESSLEVEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_compresslevel='&'#" ; else echo "# par_compresslevel="; fi ) +$( if [ ! -z ${VIASH_PAR_TAGS+x} ]; then echo "${VIASH_PAR_TAGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tags='&'#" ; else echo "# par_tags="; fi ) +$( if [ ! -z ${VIASH_PAR_SAM+x} ]; then echo "${VIASH_PAR_SAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam='&'#" ; else echo "# par_sam="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT+x} ]; then echo "${VIASH_PAR_TIMEIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit='&'#" ; else echo "# par_timeit="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_NAME+x} ]; then echo "${VIASH_PAR_TIMEIT_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_name='&'#" ; else echo "# par_timeit_name="; fi ) +$( if [ ! -z ${VIASH_PAR_TIMEIT_HEADER+x} ]; then echo "${VIASH_PAR_TIMEIT_HEADER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_timeit_header='&'#" ; else echo "# par_timeit_header="; fi ) +$( if [ ! -z ${VIASH_PAR_VERBOSE+x} ]; then echo "${VIASH_PAR_VERBOSE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_verbose='&'#" ; else echo "# par_verbose="; fi ) +$( if [ ! -z ${VIASH_PAR_RANDOM_SEED+x} ]; then echo "${VIASH_PAR_RANDOM_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_random_seed='&'#" ; else echo "# par_random_seed="; fi ) $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) @@ -3191,67 +3202,32 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" set -eo pipefail -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT +unset_if_false=( + par_sam + par_error + par_log2stderr + par_timeit_header ) -avail_mem=3072 +for var in "\\${unset_if_false[@]}"; do + test_val="\\${!var}" + [[ "\\$test_val" == "false" ]] && unset \\$var +done -if [ ! -d "\\$par_built_bbsplit_index" ]; then - other_refs=() - while IFS="," read -r name path - do - other_refs+=("ref_\\$name=\\$path") - done < "\\$par_bbsplit_fasta_list" -fi - -if \\$par_only_build_index; then - if [ -f "\\$par_primary_ref" ] && [ \\${#other_refs[@]} -gt 0 ]; then - bbsplit.sh \\\\ - -Xmx\\${avail_mem}M \\\\ - ref_primary="\\$par_primary_ref" \\${other_refs[@]} \\\\ - path=\\$par_bbsplit_index \\\\ - threads=\\${meta_cpus:-1} - else - echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files." - fi -else - IFS="," read -ra input <<< "\\$par_input" - tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX") - index_files='' - if [ -d "\\$par_built_bbsplit_index" ]; then - index_files="path=\\$par_built_bbsplit_index" - elif [ -f "\\$par_primary_ref" ] && [ \\${#other_refs[@]} -gt 0 ]; then - index_files="ref_primary=\\$par_primary_ref \\${other_refs[@]}" - else - echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files." - fi - if \\$par_paired; then - bbsplit.sh \\\\ - -Xmx\\${avail_mem}M \\\\ - \\$index_files \\\\ - threads=\\${meta_cpus:-1} \\\\ - in=\\${input[0]} \\\\ - in2=\\${input[1]} \\\\ - basename=\\${tmpdir}/%_#.fastq \\\\ - refstats=bbsplit_stats.txt - read1=\\$(find \\$tmpdir/ -iname primary_1*) - read2=\\$(find \\$tmpdir/ -iname primary_2*) - cp \\$read1 \\$par_fastq_1 - cp \\$read2 \\$par_fastq_2 - else - bbsplit.sh \\\\ - -Xmx\\${avail_mem}M \\\\ - \\$index_files \\\\ - threads=\\${meta_cpus:-1} \\\\ - in=\\${input[0]} \\\\ - basename=\\${tmpdir}/%.fastq \\\\ - refstats=bbsplit_stats.txt - read1=\\$(find \\$tmpdir/ -iname primary*) - cp \\$read1 \\$par_fastq_1 - fi -fi +umi_tools prepare-for-rsem \\\\ + \\${par_log:+--log "\\${par_log}"} \\\\ + \\${par_tags:+--tags "\\${par_tags}"} \\\\ + \\${par_sam:+--sam} \\\\ + --stdin="\\${par_input}" \\\\ + \\${par_output:+--stdout "\\${par_output}"} \\\\ + \\${par_error:+--error "\\${par_error}"} \\\\ + \\${par_temp_dir:+--temp-dir "\\${par_temp_dir}"} \\\\ + \\${par_log2stderr:+--log2stderr} \\\\ + \\${par_verbose:+--verbose "\\${par_verbose}"} \\\\ + \\${par_random_seed:+--random-seed "\\${par_random_seed}"} \\\\ + \\${par_compresslevel:+--compresslevel "\\${par_compresslevel}"} + \\${par_timeit:+--timeit "\\${par_timeit}"} \\\\ + \\${par_timeit_name:+--timeit-name "\\${par_timeit_name}"} \\\\ + \\${par_timeit_header:+--timeit-header} VIASHMAIN bash "$tempscript" ''' @@ -3612,7 +3588,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/bbmap_bbsplit", + "image" : "vsh/biobox/umi_tools/umi_tools_prepareforrsem", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/qualimap/nextflow.config b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config similarity index 96% rename from target/nextflow/qualimap/nextflow.config rename to target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config index 6c01d8f..7a5f441 100644 --- a/target/nextflow/qualimap/nextflow.config +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow.config @@ -1,9 +1,9 @@ manifest { - name = 'qualimap' + name = 'umi_tools/umi_tools_prepareforrsem' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' - description = 'RNA-seq QC analysis using the qualimap \n' + description = 'Make the output from umi-tools dedup or group compatible with RSEM' } process.container = 'nextflow/bash:latest' diff --git a/target/nextflow/kallisto/kallisto_quant/nextflow_schema.json b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json similarity index 53% rename from target/nextflow/kallisto/kallisto_quant/nextflow_schema.json rename to target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json index 14f0600..75b3c8d 100644 --- a/target/nextflow/kallisto/kallisto_quant/nextflow_schema.json +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/nextflow_schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", -"title": "kallisto_quant", -"description": "Computes equivalence classes for reads and quantifies abundances.\n", +"title": "umi_tools_prepareforrsem", +"description": "Make the output from umi-tools dedup or group compatible with RSEM", "type": "object", "definitions": { @@ -17,78 +17,8 @@ "input": { "type": "string", - "description": "Type: List of `file`, multiple_sep: `\",\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively", - "help_text": "Type: List of `file`, multiple_sep: `\",\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively." - - } - - - , - "paired": { - "type": - "boolean", - "description": "Type: `boolean`. Paired reads or not", - "help_text": "Type: `boolean`. Paired reads or not." - - } - - - , - "strandedness": { - "type": - "string", - "description": "Type: `string`. Sample strand-specificity", - "help_text": "Type: `string`. Sample strand-specificity." - - } - - - , - "index": { - "type": - "string", - "description": "Type: `file`. Kallisto genome index", - "help_text": "Type: `file`. Kallisto genome index." - - } - - - , - "gtf": { - "type": - "string", - "description": "Type: `file`. Optional gtf file for translation of transcripts into genomic coordinates", - "help_text": "Type: `file`. Optional gtf file for translation of transcripts into genomic coordinates." - - } - - - , - "chromosomes": { - "type": - "string", - "description": "Type: `file`. Optional tab separated file with chromosome names and lengths", - "help_text": "Type: `file`. Optional tab separated file with chromosome names and lengths." - - } - - - , - "fragment_length": { - "type": - "integer", - "description": "Type: `integer`. For single-end mode only, the estimated average fragment length", - "help_text": "Type: `integer`. For single-end mode only, the estimated average fragment length." - - } - - - , - "fragment_length_sd": { - "type": - "integer", - "description": "Type: `integer`. For single-end mode only, the estimated standard deviation of the fragment length", - "help_text": "Type: `integer`. For single-end mode only, the estimated standard deviation of the fragment length." + "description": "Type: `file`, required, example: `$id.transcriptome.bam`. ", + "help_text": "Type: `file`, required, example: `$id.transcriptome.bam`. " } @@ -107,10 +37,10 @@ "output": { "type": "string", - "description": "Type: `file`, default: `$id.$key.output.kallisto_quant_results`. Kallisto quant results", - "help_text": "Type: `file`, default: `$id.$key.output.kallisto_quant_results`. Kallisto quant results" + "description": "Type: `file`, default: `$id.$key.output.bam`, example: `$id.transcriptome_sorted.bam`. ", + "help_text": "Type: `file`, default: `$id.$key.output.bam`, example: `$id.transcriptome_sorted.bam`. " , - "default":"$id.$key.output.kallisto_quant_results" + "default": "$id.$key.output.bam" } @@ -118,32 +48,134 @@ "log": { "type": "string", - "description": "Type: `file`, default: `$id.$key.log.txt`. File containing log information from running kallisto quant", - "help_text": "Type: `file`, default: `$id.$key.log.txt`. File containing log information from running kallisto quant" + "description": "Type: `file`, default: `$id.$key.log.log`. File with logging information [default = stdout]", + "help_text": "Type: `file`, default: `$id.$key.log.log`. File with logging information [default = stdout]." , - "default":"$id.$key.log.txt" + "default": "$id.$key.log.log" } , - "run_info": { + "error": { "type": "string", - "description": "Type: `file`, default: `$id.$key.run_info.json`. A json file containing information about the run", - "help_text": "Type: `file`, default: `$id.$key.run_info.json`. A json file containing information about the run" + "description": "Type: `file`, default: `$id.$key.error.error`. File with error information [default = stderr]", + "help_text": "Type: `file`, default: `$id.$key.error.error`. File with error information [default = stderr]." , - "default":"$id.$key.run_info.json" + "default": "$id.$key.error.error" } , - "quant_results_file": { + "log2stderr": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Send logging information to stderr [default = False]", + "help_text": "Type: `boolean_true`, default: `false`. Send logging information to stderr [default = False]." + , + "default": "False" + } + + + , + "temp_dir": { "type": "string", - "description": "Type: `file`, default: `$id.$key.quant_results_file.tsv`. TSV file containing abundance estimates from Kallisto", - "help_text": "Type: `file`, default: `$id.$key.quant_results_file.tsv`. TSV file containing abundance estimates from Kallisto" + "description": "Type: `string`. Directory for temporary files", + "help_text": "Type: `string`. Directory for temporary files. If not set, the bash environmental variable \nTMPDIR is used.\n" + + } + + + , + "compresslevel": { + "type": + "integer", + "description": "Type: `integer`. Level of Gzip compression to use", + "help_text": "Type: `integer`. Level of Gzip compression to use. Default (6) matchesGNU gzip rather than python \ngzip default (which is 9).\n" + + } + + +} +}, + + + "options" : { + "title": "Options", + "type": "object", + "description": "No description", + "properties": { + + + "tags": { + "type": + "string", + "description": "Type: `string`, example: `UG,BX`. Comma-seperated list of tags to transfer from read1 to read2 (Default: \u0027UG,BX\u0027)\n", + "help_text": "Type: `string`, example: `UG,BX`. Comma-seperated list of tags to transfer from read1 to read2 (Default: \u0027UG,BX\u0027)\n" + + } + + + , + "sam": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Input and output SAM rather than BAM", + "help_text": "Type: `boolean_true`, default: `false`. Input and output SAM rather than BAM." , - "default":"$id.$key.quant_results_file.tsv" + "default": "False" + } + + + , + "timeit": { + "type": + "string", + "description": "Type: `string`. Store timeing information in file [none]", + "help_text": "Type: `string`. Store timeing information in file [none].\n" + + } + + + , + "timeit_name": { + "type": + "string", + "description": "Type: `string`. Name in timing file for this class of jobs [all]", + "help_text": "Type: `string`. Name in timing file for this class of jobs [all].\n" + + } + + + , + "timeit_header": { + "type": + "boolean", + "description": "Type: `boolean_true`, default: `false`. Add header for timing information [none]", + "help_text": "Type: `boolean_true`, default: `false`. Add header for timing information [none]." + , + "default": "False" + } + + + , + "verbose": { + "type": + "integer", + "description": "Type: `integer`. Loglevel [1]", + "help_text": "Type: `integer`. Loglevel [1]. The higher, the more output.\n" + + } + + + , + "random_seed": { + "type": + "integer", + "description": "Type: `integer`. Random seed to initialize number generator with [none]", + "help_text": "Type: `integer`. Random seed to initialize number generator with [none].\n" + } @@ -191,6 +223,10 @@ "$ref": "#/definitions/output" }, + { + "$ref": "#/definitions/options" + }, + { "$ref": "#/definitions/nextflow input-output arguments" } diff --git a/src/umitools_prepareforquant/prepare-for-rsem.py b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py old mode 100755 new mode 100644 similarity index 99% rename from src/umitools_prepareforquant/prepare-for-rsem.py rename to target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py index 59dd01a..b53d30a --- a/src/umitools_prepareforquant/prepare-for-rsem.py +++ b/target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/prepare-for-rsem.py @@ -50,6 +50,7 @@ from collections import defaultdict, Counter import pysam import sys + usage = """ prepare_for_rsem - make output from dedup or group compatible with RSEM diff --git a/target/executable/bbmap_bbsplit/.config.vsh.yaml b/target/executable/bbmap_bbsplit/.config.vsh.yaml deleted file mode 100644 index b07aca9..0000000 --- a/target/executable/bbmap_bbsplit/.config.vsh.yaml +++ /dev/null @@ -1,268 +0,0 @@ -name: "bbmap_bbsplit" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "string" - name: "--id" - description: "Sample ID" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input fastq files, either one or two (paired)" - info: null - example: - - "sample.fastq" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: "," - - type: "file" - name: "--primary_ref" - description: "Primary reference FASTA" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--only_build_index" - description: "true = only build index; false = mapping" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--built_bbsplit_index" - description: "Directory with index files" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--fastq_1" - description: "Output file for read 1." - info: null - default: - - "$id.$key.read_1.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastq_2" - description: "Output file for read 2." - info: null - default: - - "$id.$key.read_2.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bbsplit_index" - description: "Directory with index files" - info: null - default: - - "BBSplit_index" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Split sequencing reads by mapping them to multiple references simultaneously.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "genome.fasta" -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -- type: "file" - path: "sarscov2.fa" -- type: "file" - path: "human.fa" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/bbmap/bbsplit/main.nf" - - "modules/nf-core/bbmap/bbsplit/meta.yml" - last_sha: "277bd337739a8b8f753fa7b5eda6743b9b6acb89" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "docker" - run: - - "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\ - \ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\ - \ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/bbmap_bbsplit/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/bbmap_bbsplit" - executable: "target/executable/bbmap_bbsplit/bbmap_bbsplit" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/bbmap_bbsplit/bbmap_bbsplit b/target/executable/bbmap_bbsplit/bbmap_bbsplit deleted file mode 100755 index 22fb677..0000000 --- a/target/executable/bbmap_bbsplit/bbmap_bbsplit +++ /dev/null @@ -1,1371 +0,0 @@ -#!/usr/bin/env bash - -# bbmap_bbsplit main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="bbmap_bbsplit" -VIASH_META_FUNCTIONALITY_NAME="bbmap_bbsplit" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bbmap_bbsplit main" - echo "" - echo "Split sequencing reads by mapping them to multiple references simultaneously." - echo "" - echo "Input:" - echo " --id" - echo " type: string" - echo " Sample ID" - echo "" - echo " --paired" - echo " type: boolean" - echo " default: false" - echo " Paired fastq files or not?" - echo "" - echo " --input" - echo " type: file, multiple values allowed, file must exist" - echo " example: sample.fastq" - echo " Input fastq files, either one or two (paired)" - echo "" - echo " --primary_ref" - echo " type: file, file must exist" - echo " Primary reference FASTA" - echo "" - echo " --bbsplit_fasta_list" - echo " type: file, file must exist" - echo " Path to comma-separated file containing a list of reference genomes to" - echo " filter reads against with BBSplit." - echo "" - echo " --only_build_index" - echo " type: boolean" - echo " true = only build index; false = mapping" - echo "" - echo " --built_bbsplit_index" - echo " type: file, file must exist" - echo " Directory with index files" - echo "" - echo "Output:" - echo " --fastq_1" - echo " type: file, output" - echo " default: \$id.\$key.read_1.fastq" - echo " Output file for read 1." - echo "" - echo " --fastq_2" - echo " type: file, output" - echo " default: \$id.\$key.read_2.fastq" - echo " Output file for read 2." - echo "" - echo " --bbsplit_index" - echo " type: file, output" - echo " default: BBSplit_index" - echo " Directory with index files" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ -apt-get install -y build-essential openjdk-17-jdk wget tar && \ -wget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \ -tar xzf BBMap_39.01.tar.gz && \ -cp -r bbmap/* /usr/local/bin - -LABEL org.opencontainers.image.description="Companion container for running component bbmap_bbsplit" -LABEL org.opencontainers.image.created="2024-11-27T08:42:31Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "bbmap_bbsplit main" - exit - ;; - --id) - [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id=*) - [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --primary_ref) - [ -n "$VIASH_PAR_PRIMARY_REF" ] && ViashError Bad arguments for option \'--primary_ref\': \'$VIASH_PAR_PRIMARY_REF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PRIMARY_REF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --primary_ref. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --primary_ref=*) - [ -n "$VIASH_PAR_PRIMARY_REF" ] && ViashError Bad arguments for option \'--primary_ref=*\': \'$VIASH_PAR_PRIMARY_REF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PRIMARY_REF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bbsplit_fasta_list) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_fasta_list. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bbsplit_fasta_list=*) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list=*\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") - shift 1 - ;; - --only_build_index) - [ -n "$VIASH_PAR_ONLY_BUILD_INDEX" ] && ViashError Bad arguments for option \'--only_build_index\': \'$VIASH_PAR_ONLY_BUILD_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ONLY_BUILD_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --only_build_index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --only_build_index=*) - [ -n "$VIASH_PAR_ONLY_BUILD_INDEX" ] && ViashError Bad arguments for option \'--only_build_index=*\': \'$VIASH_PAR_ONLY_BUILD_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ONLY_BUILD_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --built_bbsplit_index) - [ -n "$VIASH_PAR_BUILT_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--built_bbsplit_index\': \'$VIASH_PAR_BUILT_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BUILT_BBSPLIT_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --built_bbsplit_index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --built_bbsplit_index=*) - [ -n "$VIASH_PAR_BUILT_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--built_bbsplit_index=*\': \'$VIASH_PAR_BUILT_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BUILT_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastq_1) - [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastq_1=*) - [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastq_2) - [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastq_2=*) - [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bbsplit_index) - [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bbsplit_index=*) - [ -n "$VIASH_PAR_BBSPLIT_INDEX" ] && ViashError Bad arguments for option \'--bbsplit_index=*\': \'$VIASH_PAR_BBSPLIT_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/bbmap_bbsplit:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_PAIRED+x} ]; then - VIASH_PAR_PAIRED="false" -fi -if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then - VIASH_PAR_FASTQ_1="\$id.\$key.read_1.fastq" -fi -if [ -z ${VIASH_PAR_FASTQ_2+x} ]; then - VIASH_PAR_FASTQ_2="\$id.\$key.read_2.fastq" -fi -if [ -z ${VIASH_PAR_BBSPLIT_INDEX+x} ]; then - VIASH_PAR_BBSPLIT_INDEX="BBSplit_index" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_PRIMARY_REF" ] && [ ! -e "$VIASH_PAR_PRIMARY_REF" ]; then - ViashError "Input file '$VIASH_PAR_PRIMARY_REF' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && [ ! -e "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then - ViashError "Input file '$VIASH_PAR_BBSPLIT_FASTA_LIST' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BUILT_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_BUILT_BBSPLIT_INDEX" ]; then - ViashError "Input file '$VIASH_PAR_BUILT_BBSPLIT_INDEX' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ONLY_BUILD_INDEX" ]]; then - if ! [[ "$VIASH_PAR_ONLY_BUILD_INDEX" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--only_build_index' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQ_1")" -fi -if [ ! -z "$VIASH_PAR_FASTQ_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQ_2")" -fi -if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_BBSPLIT_INDEX")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_BBSPLIT_INDEX")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_PRIMARY_REF" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_PRIMARY_REF")" ) - VIASH_PAR_PRIMARY_REF=$(ViashDockerAutodetectMount "$VIASH_PAR_PRIMARY_REF") -fi -if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BBSPLIT_FASTA_LIST")" ) - VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashDockerAutodetectMount "$VIASH_PAR_BBSPLIT_FASTA_LIST") -fi -if [ ! -z "$VIASH_PAR_BUILT_BBSPLIT_INDEX" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BUILT_BBSPLIT_INDEX")" ) - VIASH_PAR_BUILT_BBSPLIT_INDEX=$(ViashDockerAutodetectMount "$VIASH_PAR_BUILT_BBSPLIT_INDEX") -fi -if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_1")" ) - VIASH_PAR_FASTQ_1=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_1" ) -fi -if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_2")" ) - VIASH_PAR_FASTQ_2=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_2" ) -fi -if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BBSPLIT_INDEX")" ) - VIASH_PAR_BBSPLIT_INDEX=$(ViashDockerAutodetectMount "$VIASH_PAR_BBSPLIT_INDEX") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_BBSPLIT_INDEX" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bbmap_bbsplit-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\"'\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_PRIMARY_REF+x} ]; then echo "${VIASH_PAR_PRIMARY_REF}" | sed "s#'#'\"'\"'#g;s#.*#par_primary_ref='&'#" ; else echo "# par_primary_ref="; fi ) -$( if [ ! -z ${VIASH_PAR_BBSPLIT_FASTA_LIST+x} ]; then echo "${VIASH_PAR_BBSPLIT_FASTA_LIST}" | sed "s#'#'\"'\"'#g;s#.*#par_bbsplit_fasta_list='&'#" ; else echo "# par_bbsplit_fasta_list="; fi ) -$( if [ ! -z ${VIASH_PAR_ONLY_BUILD_INDEX+x} ]; then echo "${VIASH_PAR_ONLY_BUILD_INDEX}" | sed "s#'#'\"'\"'#g;s#.*#par_only_build_index='&'#" ; else echo "# par_only_build_index="; fi ) -$( if [ ! -z ${VIASH_PAR_BUILT_BBSPLIT_INDEX+x} ]; then echo "${VIASH_PAR_BUILT_BBSPLIT_INDEX}" | sed "s#'#'\"'\"'#g;s#.*#par_built_bbsplit_index='&'#" ; else echo "# par_built_bbsplit_index="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) -$( if [ ! -z ${VIASH_PAR_BBSPLIT_INDEX+x} ]; then echo "${VIASH_PAR_BBSPLIT_INDEX}" | sed "s#'#'\"'\"'#g;s#.*#par_bbsplit_index='&'#" ; else echo "# par_bbsplit_index="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -avail_mem=3072 - -if [ ! -d "\$par_built_bbsplit_index" ]; then - other_refs=() - while IFS="," read -r name path - do - other_refs+=("ref_\$name=\$path") - done < "\$par_bbsplit_fasta_list" -fi - -if \$par_only_build_index; then - if [ -f "\$par_primary_ref" ] && [ \${#other_refs[@]} -gt 0 ]; then - bbsplit.sh \\ - -Xmx\${avail_mem}M \\ - ref_primary="\$par_primary_ref" \${other_refs[@]} \\ - path=\$par_bbsplit_index \\ - threads=\${meta_cpus:-1} - else - echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files." - fi -else - IFS="," read -ra input <<< "\$par_input" - tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") - index_files='' - if [ -d "\$par_built_bbsplit_index" ]; then - index_files="path=\$par_built_bbsplit_index" - elif [ -f "\$par_primary_ref" ] && [ \${#other_refs[@]} -gt 0 ]; then - index_files="ref_primary=\$par_primary_ref \${other_refs[@]}" - else - echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files." - fi - if \$par_paired; then - bbsplit.sh \\ - -Xmx\${avail_mem}M \\ - \$index_files \\ - threads=\${meta_cpus:-1} \\ - in=\${input[0]} \\ - in2=\${input[1]} \\ - basename=\${tmpdir}/%_#.fastq \\ - refstats=bbsplit_stats.txt - read1=\$(find \$tmpdir/ -iname primary_1*) - read2=\$(find \$tmpdir/ -iname primary_2*) - cp \$read1 \$par_fastq_1 - cp \$read2 \$par_fastq_2 - else - bbsplit.sh \\ - -Xmx\${avail_mem}M \\ - \$index_files \\ - threads=\${meta_cpus:-1} \\ - in=\${input[0]} \\ - basename=\${tmpdir}/%.fastq \\ - refstats=bbsplit_stats.txt - read1=\$(find \$tmpdir/ -iname primary*) - cp \$read1 \$par_fastq_1 - fi -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_PRIMARY_REF" ]; then - VIASH_PAR_PRIMARY_REF=$(ViashDockerStripAutomount "$VIASH_PAR_PRIMARY_REF") - fi - if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then - VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashDockerStripAutomount "$VIASH_PAR_BBSPLIT_FASTA_LIST") - fi - if [ ! -z "$VIASH_PAR_BUILT_BBSPLIT_INDEX" ]; then - VIASH_PAR_BUILT_BBSPLIT_INDEX=$(ViashDockerStripAutomount "$VIASH_PAR_BUILT_BBSPLIT_INDEX") - fi - if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then - VIASH_PAR_FASTQ_1=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_1") - fi - if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then - VIASH_PAR_FASTQ_2=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_2") - fi - if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ]; then - VIASH_PAR_BBSPLIT_INDEX=$(ViashDockerStripAutomount "$VIASH_PAR_BBSPLIT_INDEX") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -exit 0 diff --git a/target/executable/bedtools_genomecov/.config.vsh.yaml b/target/executable/bedtools_genomecov/.config.vsh.yaml index b3e6899..ef4c58c 100644 --- a/target/executable/bedtools_genomecov/.config.vsh.yaml +++ b/target/executable/bedtools_genomecov/.config.vsh.yaml @@ -83,7 +83,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -178,8 +178,8 @@ build_info: output: "target/executable/bedtools_genomecov" executable: "target/executable/bedtools_genomecov/bedtools_genomecov" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -190,7 +190,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/bedtools_genomecov/bedtools_genomecov b/target/executable/bedtools_genomecov/bedtools_genomecov index 4997838..822fdfc 100755 --- a/target/executable/bedtools_genomecov/bedtools_genomecov +++ b/target/executable/bedtools_genomecov/bedtools_genomecov @@ -481,9 +481,9 @@ mv bedtools.static /usr/local/bin/bedtools && \ chmod a+x /usr/local/bin/bedtools LABEL org.opencontainers.image.description="Companion container for running component bedtools_genomecov" -LABEL org.opencontainers.image.created="2024-11-27T08:42:29Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:51Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/cat_additional_fasta/.config.vsh.yaml b/target/executable/cat_additional_fasta/.config.vsh.yaml index 07ba9ba..634b402 100644 --- a/target/executable/cat_additional_fasta/.config.vsh.yaml +++ b/target/executable/cat_additional_fasta/.config.vsh.yaml @@ -93,7 +93,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -182,8 +182,8 @@ build_info: output: "target/executable/cat_additional_fasta" executable: "target/executable/cat_additional_fasta/cat_additional_fasta" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -194,7 +194,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/cat_additional_fasta/cat_additional_fasta b/target/executable/cat_additional_fasta/cat_additional_fasta index 7d6c085..11b0fb6 100755 --- a/target/executable/cat_additional_fasta/cat_additional_fasta +++ b/target/executable/cat_additional_fasta/cat_additional_fasta @@ -480,9 +480,9 @@ function ViashDockerfile { FROM python:latest ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component cat_additional_fasta" -LABEL org.opencontainers.image.created="2024-11-27T08:42:28Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:50Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/cat_fastq/.config.vsh.yaml b/target/executable/cat_fastq/.config.vsh.yaml index b285595..2beaa6b 100644 --- a/target/executable/cat_fastq/.config.vsh.yaml +++ b/target/executable/cat_fastq/.config.vsh.yaml @@ -80,7 +80,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -169,8 +169,8 @@ build_info: output: "target/executable/cat_fastq" executable: "target/executable/cat_fastq/cat_fastq" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -181,7 +181,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/cat_fastq/cat_fastq b/target/executable/cat_fastq/cat_fastq index 726cb72..b401291 100755 --- a/target/executable/cat_fastq/cat_fastq +++ b/target/executable/cat_fastq/cat_fastq @@ -472,9 +472,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component cat_fastq" -LABEL org.opencontainers.image.created="2024-11-27T08:42:27Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:49Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/deseq2_qc/.config.vsh.yaml b/target/executable/deseq2_qc/.config.vsh.yaml index 68bc8df..e994690 100644 --- a/target/executable/deseq2_qc/.config.vsh.yaml +++ b/target/executable/deseq2_qc/.config.vsh.yaml @@ -136,7 +136,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -237,8 +237,8 @@ build_info: output: "target/executable/deseq2_qc" executable: "target/executable/deseq2_qc/deseq2_qc" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -249,7 +249,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/deseq2_qc/deseq2_qc b/target/executable/deseq2_qc/deseq2_qc index 32190e1..c868da1 100755 --- a/target/executable/deseq2_qc/deseq2_qc +++ b/target/executable/deseq2_qc/deseq2_qc @@ -506,9 +506,9 @@ RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packag Rscript -e 'remotes::install_cran(c("optparse", "ggplot2", "RColorBrewer", "pheatmap", "stringr", "matrixStats"), repos = "https://cran.rstudio.com")' LABEL org.opencontainers.image.description="Companion container for running component deseq2_qc" -LABEL org.opencontainers.image.created="2024-11-27T08:42:30Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:49Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/dupradar/.config.vsh.yaml b/target/executable/dupradar/.config.vsh.yaml index 2c983c6..203f594 100644 --- a/target/executable/dupradar/.config.vsh.yaml +++ b/target/executable/dupradar/.config.vsh.yaml @@ -168,7 +168,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -266,8 +266,8 @@ build_info: output: "target/executable/dupradar" executable: "target/executable/dupradar/dupradar" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -278,7 +278,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/dupradar/dupradar b/target/executable/dupradar/dupradar index 678ff50..39a8f1b 100755 --- a/target/executable/dupradar/dupradar +++ b/target/executable/dupradar/dupradar @@ -520,9 +520,9 @@ RUN Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.pa Rscript -e 'if (!requireNamespace("dupRadar", quietly = TRUE)) BiocManager::install("dupRadar")' LABEL org.opencontainers.image.description="Companion container for running component dupradar" -LABEL org.opencontainers.image.created="2024-11-27T08:42:30Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:51Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/fastqc/.config.vsh.yaml b/target/executable/fastqc/.config.vsh.yaml deleted file mode 100644 index ca9e1b9..0000000 --- a/target/executable/fastqc/.config.vsh.yaml +++ /dev/null @@ -1,228 +0,0 @@ -name: "fastqc" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input fastq files, either one or two (paired)" - info: null - example: - - "sample.fastq" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," -- name: "Output" - arguments: - - type: "file" - name: "--fastqc_html_1" - description: "FastQC HTML report for read 1." - info: null - default: - - "$id.read_1.fastqc.html" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastqc_html_2" - description: "FastQC HTML report for read 2." - info: null - default: - - "$id.read_2.fastqc.html" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastqc_zip_1" - description: "FastQC report archive for read 1." - info: null - default: - - "$id.read_1.fastqc.zip" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastqc_zip_2" - description: "FastQC report archive for read 2." - info: null - default: - - "$id.read_2.fastqc.zip" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\ - \ This component can take one or more files (by means of shell globbing) or a complete\ - \ directory.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/fastqc/main.nf" - - "modules/nf-core/fastqc/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "fastqc" - interactive: false - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/fastqc/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/fastqc" - executable: "target/executable/fastqc/fastqc" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/fastqc/fastqc b/target/executable/fastqc/fastqc deleted file mode 100755 index f72f353..0000000 --- a/target/executable/fastqc/fastqc +++ /dev/null @@ -1,1273 +0,0 @@ -#!/usr/bin/env bash - -# fastqc main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="fastqc" -VIASH_META_FUNCTIONALITY_NAME="fastqc" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "fastqc main" - echo "" - echo "Fastqc component, please see" - echo "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can" - echo "take one or more files (by means of shell globbing) or a complete directory." - echo "" - echo "Input:" - echo " --paired" - echo " type: boolean" - echo " default: false" - echo " Paired fastq files or not?" - echo "" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample.fastq" - echo " Input fastq files, either one or two (paired)" - echo "" - echo "Output:" - echo " --fastqc_html_1" - echo " type: file, output, file must exist" - echo " default: \$id.read_1.fastqc.html" - echo " FastQC HTML report for read 1." - echo "" - echo " --fastqc_html_2" - echo " type: file, output" - echo " default: \$id.read_2.fastqc.html" - echo " FastQC HTML report for read 2." - echo "" - echo " --fastqc_zip_1" - echo " type: file, output, file must exist" - echo " default: \$id.read_1.fastqc.zip" - echo " FastQC report archive for read 1." - echo "" - echo " --fastqc_zip_2" - echo " type: file, output" - echo " default: \$id.read_2.fastqc.zip" - echo " FastQC report archive for read 2." -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y fastqc && \ - rm -rf /var/lib/apt/lists/* - -LABEL org.opencontainers.image.description="Companion container for running component fastqc" -LABEL org.opencontainers.image.created="2024-11-27T08:42:33Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "fastqc main" - exit - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --fastqc_html_1) - [ -n "$VIASH_PAR_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--fastqc_html_1\': \'$VIASH_PAR_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_HTML_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_html_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastqc_html_1=*) - [ -n "$VIASH_PAR_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--fastqc_html_1=*\': \'$VIASH_PAR_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_HTML_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastqc_html_2) - [ -n "$VIASH_PAR_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--fastqc_html_2\': \'$VIASH_PAR_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_HTML_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_html_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastqc_html_2=*) - [ -n "$VIASH_PAR_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--fastqc_html_2=*\': \'$VIASH_PAR_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_HTML_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastqc_zip_1) - [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_ZIP_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastqc_zip_1=*) - [ -n "$VIASH_PAR_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--fastqc_zip_1=*\': \'$VIASH_PAR_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_ZIP_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastqc_zip_2) - [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_ZIP_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_zip_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastqc_zip_2=*) - [ -n "$VIASH_PAR_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--fastqc_zip_2=*\': \'$VIASH_PAR_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_ZIP_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/fastqc:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_PAIRED+x} ]; then - VIASH_PAR_PAIRED="false" -fi -if [ -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then - VIASH_PAR_FASTQC_HTML_1="\$id.read_1.fastqc.html" -fi -if [ -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then - VIASH_PAR_FASTQC_HTML_2="\$id.read_2.fastqc.html" -fi -if [ -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then - VIASH_PAR_FASTQC_ZIP_1="\$id.read_1.fastqc.zip" -fi -if [ -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then - VIASH_PAR_FASTQC_ZIP_2="\$id.read_2.fastqc.zip" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_FASTQC_HTML_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_HTML_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQC_HTML_1")" -fi -if [ ! -z "$VIASH_PAR_FASTQC_HTML_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_HTML_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQC_HTML_2")" -fi -if [ ! -z "$VIASH_PAR_FASTQC_ZIP_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_ZIP_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQC_ZIP_1")" -fi -if [ ! -z "$VIASH_PAR_FASTQC_ZIP_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQC_ZIP_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQC_ZIP_2")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_FASTQC_HTML_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQC_HTML_1")" ) - VIASH_PAR_FASTQC_HTML_1=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQC_HTML_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQC_HTML_1" ) -fi -if [ ! -z "$VIASH_PAR_FASTQC_HTML_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQC_HTML_2")" ) - VIASH_PAR_FASTQC_HTML_2=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQC_HTML_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQC_HTML_2" ) -fi -if [ ! -z "$VIASH_PAR_FASTQC_ZIP_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQC_ZIP_1")" ) - VIASH_PAR_FASTQC_ZIP_1=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQC_ZIP_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQC_ZIP_1" ) -fi -if [ ! -z "$VIASH_PAR_FASTQC_ZIP_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQC_ZIP_2")" ) - VIASH_PAR_FASTQC_ZIP_2=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQC_ZIP_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQC_ZIP_2" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-fastqc-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then echo "${VIASH_PAR_FASTQC_HTML_1}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_html_1='&'#" ; else echo "# par_fastqc_html_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then echo "${VIASH_PAR_FASTQC_HTML_2}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_html_2='&'#" ; else echo "# par_fastqc_html_2="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then echo "${VIASH_PAR_FASTQC_ZIP_1}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_zip_1='&'#" ; else echo "# par_fastqc_zip_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then echo "${VIASH_PAR_FASTQC_ZIP_2}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_zip_2='&'#" ; else echo "# par_fastqc_zip_2="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_name-XXXXXXXX") - -IFS="," read -ra input <<< \$par_input -count=\${#input[@]} - -if \$par_paired; then - echo "Paired - \$count" - if [ \$count -ne 2 ]; then - echo "Paired end input requires two files" - exit 1 - fi -else - echo "Not Paired - \$count" - if [ \$count -ne 1 ]; then - echo "Single end input requires one file" - exit 1 - fi -fi - -fastqc -o \$tmpdir \${input[*]} - -file1=\$(basename -- "\${input[0]}") -read1="\${file1%.fastq*}" -[[ -e "\${tmpdir}/\${read1}_fastqc.html" ]] && cp "\${tmpdir}/\${read1}_fastqc.html" \$par_fastqc_html_1 -[[ -e "\${tmpdir}/\${read1}_fastqc.zip" ]] && cp "\${tmpdir}/\${read1}_fastqc.zip" \$par_fastqc_zip_1 - -if \$par_paired; then - file2=\$(basename -- "\${input[1]}") - read2="\${file2%.fastq*}" - [[ -e "\${tmpdir}/\${read2}_fastqc.html" ]] && cp "\${tmpdir}/\${read2}_fastqc.html" \$par_fastqc_html_2 - [[ -e "\${tmpdir}/\${read2}_fastqc.zip" ]] && cp "\${tmpdir}/\${read2}_fastqc.zip" \$par_fastqc_zip_2 -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_FASTQC_HTML_1" ]; then - VIASH_PAR_FASTQC_HTML_1=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQC_HTML_1") - fi - if [ ! -z "$VIASH_PAR_FASTQC_HTML_2" ]; then - VIASH_PAR_FASTQC_HTML_2=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQC_HTML_2") - fi - if [ ! -z "$VIASH_PAR_FASTQC_ZIP_1" ]; then - VIASH_PAR_FASTQC_ZIP_1=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQC_ZIP_1") - fi - if [ ! -z "$VIASH_PAR_FASTQC_ZIP_2" ]; then - VIASH_PAR_FASTQC_ZIP_2=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQC_ZIP_2") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_FASTQC_HTML_1" ] && [ ! -e "$VIASH_PAR_FASTQC_HTML_1" ]; then - ViashError "Output file '$VIASH_PAR_FASTQC_HTML_1' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_FASTQC_ZIP_1" ] && [ ! -e "$VIASH_PAR_FASTQC_ZIP_1" ]; then - ViashError "Output file '$VIASH_PAR_FASTQC_ZIP_1' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/fq_subsample/.config.vsh.yaml b/target/executable/fq_subsample/.config.vsh.yaml deleted file mode 100644 index 46ca2d1..0000000 --- a/target/executable/fq_subsample/.config.vsh.yaml +++ /dev/null @@ -1,207 +0,0 @@ -name: "fq_subsample" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Input fastq files to subsample" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - - type: "string" - name: "--extra_args" - description: "Extra arguments to pass to fq subsample" - info: null - default: - - "" - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Input" - arguments: - - type: "file" - name: "--output_1" - description: "Sampled read 1 fastq files" - info: null - default: - - "$id.read_1.subsampled.fastq" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_2" - description: "Sampled read 2 fastq files" - info: null - default: - - "$id.read_2.subsampled.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "fq subsample outputs a subset of records from single or paired FASTQ\ - \ files. This requires a seed (--seed) to be set in ext.args\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/fq/subsample/main.nf" - - "modules/nf-core/fq/subsample/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "docker" - run: - - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\ - \ && \\\napt-get update && \\\napt-get install -y --no-install-recommends build-essential\ - \ git-all curl && \\\ncurl https://sh.rustup.rs -sSf | sh -s -- -y && \\\n.\ - \ \"$HOME/.cargo/env\" && \\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git\ - \ && \\\nmv fq /usr/local/ && cd /usr/local/fq && \\\ncargo install --locked\ - \ --path . && \\\nmv /usr/local/fq/target/release/fq /usr/local/bin/\n" - env: - - "TZ=Europe/Brussels" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/fq_subsample/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/fq_subsample" - executable: "target/executable/fq_subsample/fq_subsample" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/fq_subsample/fq_subsample b/target/executable/fq_subsample/fq_subsample deleted file mode 100755 index b1ef5cc..0000000 --- a/target/executable/fq_subsample/fq_subsample +++ /dev/null @@ -1,1184 +0,0 @@ -#!/usr/bin/env bash - -# fq_subsample main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="fq_subsample" -VIASH_META_FUNCTIONALITY_NAME="fq_subsample" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "fq_subsample main" - echo "" - echo "fq subsample outputs a subset of records from single or paired FASTQ files. This" - echo "requires a seed (--seed) to be set in ext.args" - echo "" - echo "Input:" - echo " --input" - echo " type: file, multiple values allowed, file must exist" - echo " Input fastq files to subsample" - echo "" - echo " --extra_args" - echo " type: string" - echo " default:" - echo " Extra arguments to pass to fq subsample" - echo "" - echo "Input:" - echo " --output_1" - echo " type: file, output, file must exist" - echo " default: \$id.read_1.subsampled.fastq" - echo " Sampled read 1 fastq files" - echo "" - echo " --output_2" - echo " type: file, output" - echo " default: \$id.read_2.subsampled.fastq" - echo " Sampled read 2 fastq files" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -ENV TZ=Europe/Brussels -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ -apt-get update && \ -apt-get install -y --no-install-recommends build-essential git-all curl && \ -curl https://sh.rustup.rs -sSf | sh -s -- -y && \ -. "$HOME/.cargo/env" && \ -git clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \ -mv fq /usr/local/ && cd /usr/local/fq && \ -cargo install --locked --path . && \ -mv /usr/local/fq/target/release/fq /usr/local/bin/ - -LABEL org.opencontainers.image.description="Companion container for running component fq_subsample" -LABEL org.opencontainers.image.created="2024-11-27T08:42:32Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "fq_subsample main" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --extra_args) - [ -n "$VIASH_PAR_EXTRA_ARGS" ] && ViashError Bad arguments for option \'--extra_args\': \'$VIASH_PAR_EXTRA_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_args=*) - [ -n "$VIASH_PAR_EXTRA_ARGS" ] && ViashError Bad arguments for option \'--extra_args=*\': \'$VIASH_PAR_EXTRA_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_1) - [ -n "$VIASH_PAR_OUTPUT_1" ] && ViashError Bad arguments for option \'--output_1\': \'$VIASH_PAR_OUTPUT_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_1=*) - [ -n "$VIASH_PAR_OUTPUT_1" ] && ViashError Bad arguments for option \'--output_1=*\': \'$VIASH_PAR_OUTPUT_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_2) - [ -n "$VIASH_PAR_OUTPUT_2" ] && ViashError Bad arguments for option \'--output_2\': \'$VIASH_PAR_OUTPUT_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_2=*) - [ -n "$VIASH_PAR_OUTPUT_2" ] && ViashError Bad arguments for option \'--output_2=*\': \'$VIASH_PAR_OUTPUT_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/fq_subsample:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then - VIASH_PAR_EXTRA_ARGS="" -fi -if [ -z ${VIASH_PAR_OUTPUT_1+x} ]; then - VIASH_PAR_OUTPUT_1="\$id.read_1.subsampled.fastq" -fi -if [ -z ${VIASH_PAR_OUTPUT_2+x} ]; then - VIASH_PAR_OUTPUT_2="\$id.read_2.subsampled.fastq" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_1" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_1")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_2" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_2")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_1")" ) - VIASH_PAR_OUTPUT_1=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_1" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_2")" ) - VIASH_PAR_OUTPUT_2=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_2" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-fq_subsample-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_1+x} ]; then echo "${VIASH_PAR_OUTPUT_1}" | sed "s#'#'\"'\"'#g;s#.*#par_output_1='&'#" ; else echo "# par_output_1="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_2+x} ]; then echo "${VIASH_PAR_OUTPUT_2}" | sed "s#'#'\"'\"'#g;s#.*#par_output_2='&'#" ; else echo "# par_output_2="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -IFS=";" read -ra input <<< \$par_input -n_fastq=\${#input[@]} - -required_args=("-p" "--probability" "-n" "--read-count") -for arg in "\${required_args[@]}"; do - if [[ "\$par_extra_args" == *"\$arg"* ]]; then - echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args" - exit 1 - fi -done - -if [ \$n_fastq -eq 1 ]; then - fq subsample \$par_extra_args \${input[*]} --r1-dst \$par_output_1 -elif [ \$n_fastq -eq 2 ]; then - fq subsample \$par_extra_args \${input[*]} --r1-dst \$par_output_1 --r2-dst \$par_output_2 -else - echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!" - exit 1 -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_OUTPUT_1" ]; then - VIASH_PAR_OUTPUT_1=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_1") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_2" ]; then - VIASH_PAR_OUTPUT_2=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_2") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT_1" ] && [ ! -e "$VIASH_PAR_OUTPUT_1" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_1' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/getchromsizes/.config.vsh.yaml b/target/executable/getchromsizes/.config.vsh.yaml index b1549ed..61d12e7 100644 --- a/target/executable/getchromsizes/.config.vsh.yaml +++ b/target/executable/getchromsizes/.config.vsh.yaml @@ -70,7 +70,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -167,8 +167,8 @@ build_info: output: "target/executable/getchromsizes" executable: "target/executable/getchromsizes/getchromsizes" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -179,7 +179,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/getchromsizes/getchromsizes b/target/executable/getchromsizes/getchromsizes index dffcb7f..c4dd3c7 100755 --- a/target/executable/getchromsizes/getchromsizes +++ b/target/executable/getchromsizes/getchromsizes @@ -480,9 +480,9 @@ make && \ make install LABEL org.opencontainers.image.description="Companion container for running component getchromsizes" -LABEL org.opencontainers.image.created="2024-11-27T08:42:29Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:50Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/gtf2bed/.config.vsh.yaml b/target/executable/gtf2bed/.config.vsh.yaml index 835a821..70e8fd1 100644 --- a/target/executable/gtf2bed/.config.vsh.yaml +++ b/target/executable/gtf2bed/.config.vsh.yaml @@ -51,7 +51,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -145,8 +145,8 @@ build_info: output: "target/executable/gtf2bed" executable: "target/executable/gtf2bed/gtf2bed" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -157,7 +157,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/gtf2bed/gtf2bed b/target/executable/gtf2bed/gtf2bed index b163db2..bd66c54 100755 --- a/target/executable/gtf2bed/gtf2bed +++ b/target/executable/gtf2bed/gtf2bed @@ -466,9 +466,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component gtf2bed" -LABEL org.opencontainers.image.created="2024-11-27T08:42:30Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:51Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/gtf_filter/.config.vsh.yaml b/target/executable/gtf_filter/.config.vsh.yaml index 562a7dc..baf51cc 100644 --- a/target/executable/gtf_filter/.config.vsh.yaml +++ b/target/executable/gtf_filter/.config.vsh.yaml @@ -66,7 +66,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -155,8 +155,8 @@ build_info: output: "target/executable/gtf_filter" executable: "target/executable/gtf_filter/gtf_filter" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -167,7 +167,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/gtf_filter/gtf_filter b/target/executable/gtf_filter/gtf_filter index b97b9b2..bba566c 100755 --- a/target/executable/gtf_filter/gtf_filter +++ b/target/executable/gtf_filter/gtf_filter @@ -470,9 +470,9 @@ function ViashDockerfile { FROM python:latest ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component gtf_filter" -LABEL org.opencontainers.image.created="2024-11-27T08:42:29Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:50Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/gunzip/.config.vsh.yaml b/target/executable/gunzip/.config.vsh.yaml index 1cff7b4..b82306b 100644 --- a/target/executable/gunzip/.config.vsh.yaml +++ b/target/executable/gunzip/.config.vsh.yaml @@ -50,7 +50,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -144,8 +144,8 @@ build_info: output: "target/executable/gunzip" executable: "target/executable/gunzip/gunzip" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -156,7 +156,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/gunzip/gunzip b/target/executable/gunzip/gunzip index f07b83e..df2feb3 100755 --- a/target/executable/gunzip/gunzip +++ b/target/executable/gunzip/gunzip @@ -466,9 +466,9 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.description="Companion container for running component gunzip" -LABEL org.opencontainers.image.created="2024-11-27T08:42:24Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:46Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/kallisto/kallisto_index/.config.vsh.yaml b/target/executable/kallisto/kallisto_index/.config.vsh.yaml deleted file mode 100644 index 2914711..0000000 --- a/target/executable/kallisto/kallisto_index/.config.vsh.yaml +++ /dev/null @@ -1,185 +0,0 @@ -name: "kallisto_index" -namespace: "kallisto" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--transcriptome_fasta" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--pseudo_aligner_kmer_size" - description: "Kmer length passed to indexing step of pseudoaligners." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--kallisto_index" - info: null - default: - - "Kallisto_index" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Create Kallisto index.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "transcriptome.fasta" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/kallisto/index/main.nf" - - "modules/nf-core/kallisto/index/meta.yml" - last_sha: "c0816976384d5e7ee6079c29c45958df1ffa0ee4" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "docker" - run: - - "apt-get update && \\\napt-get install -y --no-install-recommends wget && \\\ - \nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\ - \ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\ - \ /usr/local/bin/\n" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/kallisto/kallisto_index/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/kallisto/kallisto_index" - executable: "target/executable/kallisto/kallisto_index/kallisto_index" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/kallisto/kallisto_index/kallisto_index b/target/executable/kallisto/kallisto_index/kallisto_index deleted file mode 100755 index c1530cc..0000000 --- a/target/executable/kallisto/kallisto_index/kallisto_index +++ /dev/null @@ -1,1101 +0,0 @@ -#!/usr/bin/env bash - -# kallisto_index main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="kallisto_index" -VIASH_META_FUNCTIONALITY_NAME="kallisto_index" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "kallisto_index main" - echo "" - echo "Create Kallisto index." - echo "" - echo "Input:" - echo " --transcriptome_fasta" - echo " type: file, file must exist" - echo "" - echo " --pseudo_aligner_kmer_size" - echo " type: integer" - echo " Kmer length passed to indexing step of pseudoaligners." - echo "" - echo "Output:" - echo " --kallisto_index" - echo " type: file, output, file must exist" - echo " default: Kallisto_index" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ -apt-get install -y --no-install-recommends wget && \ -wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \ -tar -xzf kallisto_linux-v0.50.1.tar.gz && \ -mv kallisto/kallisto /usr/local/bin/ - -LABEL org.opencontainers.image.description="Companion container for running component kallisto kallisto_index" -LABEL org.opencontainers.image.created="2024-11-27T08:42:25Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "kallisto_index main" - exit - ;; - --transcriptome_fasta) - [ -n "$VIASH_PAR_TRANSCRIPTOME_FASTA" ] && ViashError Bad arguments for option \'--transcriptome_fasta\': \'$VIASH_PAR_TRANSCRIPTOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_FASTA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --transcriptome_fasta. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --transcriptome_fasta=*) - [ -n "$VIASH_PAR_TRANSCRIPTOME_FASTA" ] && ViashError Bad arguments for option \'--transcriptome_fasta=*\': \'$VIASH_PAR_TRANSCRIPTOME_FASTA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRANSCRIPTOME_FASTA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --pseudo_aligner_kmer_size) - [ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ] && ViashError Bad arguments for option \'--pseudo_aligner_kmer_size\': \'$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --pseudo_aligner_kmer_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --pseudo_aligner_kmer_size=*) - [ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ] && ViashError Bad arguments for option \'--pseudo_aligner_kmer_size=*\': \'$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --kallisto_index) - [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_KALLISTO_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --kallisto_index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --kallisto_index=*) - [ -n "$VIASH_PAR_KALLISTO_INDEX" ] && ViashError Bad arguments for option \'--kallisto_index=*\': \'$VIASH_PAR_KALLISTO_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_KALLISTO_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/kallisto/kallisto_index:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_KALLISTO_INDEX+x} ]; then - VIASH_PAR_KALLISTO_INDEX="Kallisto_index" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_FASTA" ] && [ ! -e "$VIASH_PAR_TRANSCRIPTOME_FASTA" ]; then - ViashError "Input file '$VIASH_PAR_TRANSCRIPTOME_FASTA' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ]]; then - if ! [[ "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--pseudo_aligner_kmer_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ] && [ ! -d "$(dirname "$VIASH_PAR_KALLISTO_INDEX")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_KALLISTO_INDEX")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_FASTA" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRANSCRIPTOME_FASTA")" ) - VIASH_PAR_TRANSCRIPTOME_FASTA=$(ViashDockerAutodetectMount "$VIASH_PAR_TRANSCRIPTOME_FASTA") -fi -if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_KALLISTO_INDEX")" ) - VIASH_PAR_KALLISTO_INDEX=$(ViashDockerAutodetectMount "$VIASH_PAR_KALLISTO_INDEX") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_KALLISTO_INDEX" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-kallisto_index-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_FASTA+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_FASTA}" | sed "s#'#'\"'\"'#g;s#.*#par_transcriptome_fasta='&'#" ; else echo "# par_transcriptome_fasta="; fi ) -$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_pseudo_aligner_kmer_size='&'#" ; else echo "# par_pseudo_aligner_kmer_size="; fi ) -$( if [ ! -z ${VIASH_PAR_KALLISTO_INDEX+x} ]; then echo "${VIASH_PAR_KALLISTO_INDEX}" | sed "s#'#'\"'\"'#g;s#.*#par_kallisto_index='&'#" ; else echo "# par_kallisto_index="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -kallisto index \\ - \${par_pseudo_aligner_kmer_size:+-k \$par_pseudo_aligner_kmer_size} \\ - -i \$par_kallisto_index \\ - \$par_transcriptome_fasta -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_TRANSCRIPTOME_FASTA" ]; then - VIASH_PAR_TRANSCRIPTOME_FASTA=$(ViashDockerStripAutomount "$VIASH_PAR_TRANSCRIPTOME_FASTA") - fi - if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ]; then - VIASH_PAR_KALLISTO_INDEX=$(ViashDockerStripAutomount "$VIASH_PAR_KALLISTO_INDEX") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ] && [ ! -e "$VIASH_PAR_KALLISTO_INDEX" ]; then - ViashError "Output file '$VIASH_PAR_KALLISTO_INDEX' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/kallisto/kallisto_quant/kallisto_quant b/target/executable/kallisto/kallisto_quant/kallisto_quant deleted file mode 100755 index d77ea84..0000000 --- a/target/executable/kallisto/kallisto_quant/kallisto_quant +++ /dev/null @@ -1,1415 +0,0 @@ -#!/usr/bin/env bash - -# kallisto_quant main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="kallisto_quant" -VIASH_META_FUNCTIONALITY_NAME="kallisto_quant" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "kallisto_quant main" - echo "" - echo "Computes equivalence classes for reads and quantifies abundances." - echo "" - echo "Input:" - echo " --input" - echo " type: file, multiple values allowed, file must exist" - echo " List of input FastQ files of size 1 and 2 for single-end and paired-end" - echo " data, respectively." - echo "" - echo " --paired" - echo " type: boolean" - echo " Paired reads or not." - echo "" - echo " --strandedness" - echo " type: string" - echo " Sample strand-specificity." - echo "" - echo " --index" - echo " type: file, file must exist" - echo " Kallisto genome index." - echo "" - echo " --gtf" - echo " type: file, file must exist" - echo " Optional gtf file for translation of transcripts into genomic" - echo " coordinates." - echo "" - echo " --chromosomes" - echo " type: file, file must exist" - echo " Optional tab separated file with chromosome names and lengths." - echo "" - echo " --fragment_length" - echo " type: integer" - echo " For single-end mode only, the estimated average fragment length." - echo "" - echo " --fragment_length_sd" - echo " type: integer" - echo " For single-end mode only, the estimated standard deviation of the" - echo " fragment length." - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: \$id.kallisto_quant_results" - echo " Kallisto quant results" - echo "" - echo " --log" - echo " type: file, output, file must exist" - echo " default: \$id.kallisto_quant.log.txt" - echo " File containing log information from running kallisto quant" - echo "" - echo " --run_info" - echo " type: file, output, file must exist" - echo " default: \$id.run_info.json" - echo " A json file containing information about the run" - echo "" - echo " --quant_results_file" - echo " type: file, output, file must exist" - echo " default: \$id.abundance.tsv" - echo " TSV file containing abundance estimates from Kallisto" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ -apt-get install -y --no-install-recommends wget && \ -wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \ -tar -xzf kallisto_linux-v0.50.1.tar.gz && \ -mv kallisto/kallisto /usr/local/bin/ - -LABEL org.opencontainers.image.description="Companion container for running component kallisto kallisto_quant" -LABEL org.opencontainers.image.created="2024-11-27T08:42:25Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "kallisto_quant main" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --strandedness) - [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDEDNESS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --strandedness=*) - [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --index) - [ -n "$VIASH_PAR_INDEX" ] && ViashError Bad arguments for option \'--index\': \'$VIASH_PAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --index=*) - [ -n "$VIASH_PAR_INDEX" ] && ViashError Bad arguments for option \'--index=*\': \'$VIASH_PAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gtf) - [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gtf=*) - [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --chromosomes) - [ -n "$VIASH_PAR_CHROMOSOMES" ] && ViashError Bad arguments for option \'--chromosomes\': \'$VIASH_PAR_CHROMOSOMES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHROMOSOMES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --chromosomes. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --chromosomes=*) - [ -n "$VIASH_PAR_CHROMOSOMES" ] && ViashError Bad arguments for option \'--chromosomes=*\': \'$VIASH_PAR_CHROMOSOMES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CHROMOSOMES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fragment_length) - [ -n "$VIASH_PAR_FRAGMENT_LENGTH" ] && ViashError Bad arguments for option \'--fragment_length\': \'$VIASH_PAR_FRAGMENT_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FRAGMENT_LENGTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fragment_length. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fragment_length=*) - [ -n "$VIASH_PAR_FRAGMENT_LENGTH" ] && ViashError Bad arguments for option \'--fragment_length=*\': \'$VIASH_PAR_FRAGMENT_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FRAGMENT_LENGTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fragment_length_sd) - [ -n "$VIASH_PAR_FRAGMENT_LENGTH_SD" ] && ViashError Bad arguments for option \'--fragment_length_sd\': \'$VIASH_PAR_FRAGMENT_LENGTH_SD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FRAGMENT_LENGTH_SD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fragment_length_sd. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fragment_length_sd=*) - [ -n "$VIASH_PAR_FRAGMENT_LENGTH_SD" ] && ViashError Bad arguments for option \'--fragment_length_sd=*\': \'$VIASH_PAR_FRAGMENT_LENGTH_SD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FRAGMENT_LENGTH_SD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --log) - [ -n "$VIASH_PAR_LOG" ] && ViashError Bad arguments for option \'--log\': \'$VIASH_PAR_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --log. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --log=*) - [ -n "$VIASH_PAR_LOG" ] && ViashError Bad arguments for option \'--log=*\': \'$VIASH_PAR_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --run_info) - [ -n "$VIASH_PAR_RUN_INFO" ] && ViashError Bad arguments for option \'--run_info\': \'$VIASH_PAR_RUN_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUN_INFO="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --run_info. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --run_info=*) - [ -n "$VIASH_PAR_RUN_INFO" ] && ViashError Bad arguments for option \'--run_info=*\': \'$VIASH_PAR_RUN_INFO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RUN_INFO=$(ViashRemoveFlags "$1") - shift 1 - ;; - --quant_results_file) - [ -n "$VIASH_PAR_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--quant_results_file\': \'$VIASH_PAR_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANT_RESULTS_FILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quant_results_file. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quant_results_file=*) - [ -n "$VIASH_PAR_QUANT_RESULTS_FILE" ] && ViashError Bad arguments for option \'--quant_results_file=*\': \'$VIASH_PAR_QUANT_RESULTS_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUANT_RESULTS_FILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/kallisto/kallisto_quant:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="\$id.kallisto_quant_results" -fi -if [ -z ${VIASH_PAR_LOG+x} ]; then - VIASH_PAR_LOG="\$id.kallisto_quant.log.txt" -fi -if [ -z ${VIASH_PAR_RUN_INFO+x} ]; then - VIASH_PAR_RUN_INFO="\$id.run_info.json" -fi -if [ -z ${VIASH_PAR_QUANT_RESULTS_FILE+x} ]; then - VIASH_PAR_QUANT_RESULTS_FILE="\$id.abundance.tsv" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_INDEX" ] && [ ! -e "$VIASH_PAR_INDEX" ]; then - ViashError "Input file '$VIASH_PAR_INDEX' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then - ViashError "Input file '$VIASH_PAR_GTF' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_CHROMOSOMES" ] && [ ! -e "$VIASH_PAR_CHROMOSOMES" ]; then - ViashError "Input file '$VIASH_PAR_CHROMOSOMES' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_FRAGMENT_LENGTH" ]]; then - if ! [[ "$VIASH_PAR_FRAGMENT_LENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--fragment_length' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_FRAGMENT_LENGTH_SD" ]]; then - if ! [[ "$VIASH_PAR_FRAGMENT_LENGTH_SD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--fragment_length_sd' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_LOG")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_LOG")" -fi -if [ ! -z "$VIASH_PAR_RUN_INFO" ] && [ ! -d "$(dirname "$VIASH_PAR_RUN_INFO")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_RUN_INFO")" -fi -if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ] && [ ! -d "$(dirname "$VIASH_PAR_QUANT_RESULTS_FILE")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUANT_RESULTS_FILE")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_INDEX" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INDEX")" ) - VIASH_PAR_INDEX=$(ViashDockerAutodetectMount "$VIASH_PAR_INDEX") -fi -if [ ! -z "$VIASH_PAR_GTF" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF")" ) - VIASH_PAR_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF") -fi -if [ ! -z "$VIASH_PAR_CHROMOSOMES" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_CHROMOSOMES")" ) - VIASH_PAR_CHROMOSOMES=$(ViashDockerAutodetectMount "$VIASH_PAR_CHROMOSOMES") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_LOG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_LOG")" ) - VIASH_PAR_LOG=$(ViashDockerAutodetectMount "$VIASH_PAR_LOG") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_LOG" ) -fi -if [ ! -z "$VIASH_PAR_RUN_INFO" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_RUN_INFO")" ) - VIASH_PAR_RUN_INFO=$(ViashDockerAutodetectMount "$VIASH_PAR_RUN_INFO") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_RUN_INFO" ) -fi -if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_QUANT_RESULTS_FILE")" ) - VIASH_PAR_QUANT_RESULTS_FILE=$(ViashDockerAutodetectMount "$VIASH_PAR_QUANT_RESULTS_FILE") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_QUANT_RESULTS_FILE" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-kallisto_quant-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\"'\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) -$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\"'\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) -$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_CHROMOSOMES+x} ]; then echo "${VIASH_PAR_CHROMOSOMES}" | sed "s#'#'\"'\"'#g;s#.*#par_chromosomes='&'#" ; else echo "# par_chromosomes="; fi ) -$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\"'\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi ) -$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_SD+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_SD}" | sed "s#'#'\"'\"'#g;s#.*#par_fragment_length_sd='&'#" ; else echo "# par_fragment_length_sd="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\"'\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) -$( if [ ! -z ${VIASH_PAR_RUN_INFO+x} ]; then echo "${VIASH_PAR_RUN_INFO}" | sed "s#'#'\"'\"'#g;s#.*#par_run_info='&'#" ; else echo "# par_run_info="; fi ) -$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS_FILE+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS_FILE}" | sed "s#'#'\"'\"'#g;s#.*#par_quant_results_file='&'#" ; else echo "# par_quant_results_file="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -IFS="," read -ra input <<< \$par_input - -single_end_params='' -if [ \$par_paired == "false" ]; then - if [[ \$par_fragment_length < 0 ]] || [[ ! \$fragment_length_sd < 0 ]]; then - echo "fragment_length and fragment_length_sd must be set for single-end data" - exit 1 - fi - single_end_params="--single --fragment-length \$par_fragment_length --sd \$par_fragment_length_sd" -fi - -strandedness='' -if [[ "\$par_extra_args" != *"--fr-stranded"* ]] && [[ "\$par_extra_args" != *"--rf-stranded"* ]]; then - if [ "\$par_strandedness" == 'forward' ]; then - strandedness='--fr-stranded' - elif [ "\$par_strandedness" == 'reverse' ]; then - strandedness='--rf-stranded' - fi -fi - -mkdir -p \$par_output - -kallisto quant \\ - \${meta_cpus:+--threads \$meta_cpus} \\ - --index \$par_index \\ - \${par_gtf:+--gtf \$par_gtf} \\ - \${par_chromosomes:+--chromosomes \$par_chromosomes} \\ - \$single_end_params \\ - \$strandedness \\ - \$par_extra_args \\ - -o \$par_output \\ - \${input[*]} 2> >(tee -a \${par_output}/kallisto_quant.log >&2) - -mv \${par_output}/kallisto_quant.log \${par_log} -mv \${par_output}/run_info.json \${par_run_info} -cp \${par_output}/abundance.tsv \${par_quant_results_file} -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_INDEX" ]; then - VIASH_PAR_INDEX=$(ViashDockerStripAutomount "$VIASH_PAR_INDEX") - fi - if [ ! -z "$VIASH_PAR_GTF" ]; then - VIASH_PAR_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_GTF") - fi - if [ ! -z "$VIASH_PAR_CHROMOSOMES" ]; then - VIASH_PAR_CHROMOSOMES=$(ViashDockerStripAutomount "$VIASH_PAR_CHROMOSOMES") - fi - if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") - fi - if [ ! -z "$VIASH_PAR_LOG" ]; then - VIASH_PAR_LOG=$(ViashDockerStripAutomount "$VIASH_PAR_LOG") - fi - if [ ! -z "$VIASH_PAR_RUN_INFO" ]; then - VIASH_PAR_RUN_INFO=$(ViashDockerStripAutomount "$VIASH_PAR_RUN_INFO") - fi - if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ]; then - VIASH_PAR_QUANT_RESULTS_FILE=$(ViashDockerStripAutomount "$VIASH_PAR_QUANT_RESULTS_FILE") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_LOG" ] && [ ! -e "$VIASH_PAR_LOG" ]; then - ViashError "Output file '$VIASH_PAR_LOG' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_RUN_INFO" ] && [ ! -e "$VIASH_PAR_RUN_INFO" ]; then - ViashError "Output file '$VIASH_PAR_RUN_INFO' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_QUANT_RESULTS_FILE" ] && [ ! -e "$VIASH_PAR_QUANT_RESULTS_FILE" ]; then - ViashError "Output file '$VIASH_PAR_QUANT_RESULTS_FILE' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/multiqc_custom_biotype/.config.vsh.yaml b/target/executable/multiqc_custom_biotype/.config.vsh.yaml index c7d1c7e..d80b412 100644 --- a/target/executable/multiqc_custom_biotype/.config.vsh.yaml +++ b/target/executable/multiqc_custom_biotype/.config.vsh.yaml @@ -76,7 +76,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -165,8 +165,8 @@ build_info: output: "target/executable/multiqc_custom_biotype" executable: "target/executable/multiqc_custom_biotype/multiqc_custom_biotype" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -177,7 +177,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/multiqc_custom_biotype/multiqc_custom_biotype b/target/executable/multiqc_custom_biotype/multiqc_custom_biotype index 501fa9d..a42c93b 100755 --- a/target/executable/multiqc_custom_biotype/multiqc_custom_biotype +++ b/target/executable/multiqc_custom_biotype/multiqc_custom_biotype @@ -476,9 +476,9 @@ function ViashDockerfile { FROM python:latest ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component multiqc_custom_biotype" -LABEL org.opencontainers.image.created="2024-11-27T08:42:28Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:49Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/picard_markduplicates/.config.vsh.yaml b/target/executable/picard_markduplicates/.config.vsh.yaml index 32c6157..b319d14 100644 --- a/target/executable/picard_markduplicates/.config.vsh.yaml +++ b/target/executable/picard_markduplicates/.config.vsh.yaml @@ -110,7 +110,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -207,8 +207,8 @@ build_info: output: "target/executable/picard_markduplicates" executable: "target/executable/picard_markduplicates/picard_markduplicates" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -219,7 +219,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/picard_markduplicates/picard_markduplicates b/target/executable/picard_markduplicates/picard_markduplicates index a4d8323..5ae0608 100755 --- a/target/executable/picard_markduplicates/picard_markduplicates +++ b/target/executable/picard_markduplicates/picard_markduplicates @@ -494,9 +494,9 @@ wget --no-check-certificate https://github.com/broadinstitute/picard/releases/do mv picard.jar /usr/local/bin LABEL org.opencontainers.image.description="Companion container for running component picard_markduplicates" -LABEL org.opencontainers.image.created="2024-11-27T08:42:26Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:47Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/prepare_multiqc_input/.config.vsh.yaml b/target/executable/prepare_multiqc_input/.config.vsh.yaml index 68cf79a..f61a51d 100644 --- a/target/executable/prepare_multiqc_input/.config.vsh.yaml +++ b/target/executable/prepare_multiqc_input/.config.vsh.yaml @@ -320,7 +320,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -409,8 +409,8 @@ build_info: output: "target/executable/prepare_multiqc_input" executable: "target/executable/prepare_multiqc_input/prepare_multiqc_input" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -421,7 +421,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/prepare_multiqc_input/prepare_multiqc_input b/target/executable/prepare_multiqc_input/prepare_multiqc_input index a3c8956..c6b9758 100755 --- a/target/executable/prepare_multiqc_input/prepare_multiqc_input +++ b/target/executable/prepare_multiqc_input/prepare_multiqc_input @@ -557,9 +557,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component prepare_multiqc_input" -LABEL org.opencontainers.image.created="2024-11-27T08:42:22Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:46Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml b/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml index df1e2e5..a49940b 100644 --- a/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml +++ b/target/executable/preprocess_transcripts_fasta/.config.vsh.yaml @@ -49,7 +49,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -138,8 +138,8 @@ build_info: output: "target/executable/preprocess_transcripts_fasta" executable: "target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -150,7 +150,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta b/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta index fc163f9..d39c310 100755 --- a/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta +++ b/target/executable/preprocess_transcripts_fasta/preprocess_transcripts_fasta @@ -462,9 +462,9 @@ function ViashDockerfile { FROM ubuntu:22.04 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component preprocess_transcripts_fasta" -LABEL org.opencontainers.image.created="2024-11-27T08:42:27Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:47Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/preseq_lcextrap/.config.vsh.yaml b/target/executable/preseq_lcextrap/.config.vsh.yaml index 46c539f..ac3c34e 100644 --- a/target/executable/preseq_lcextrap/.config.vsh.yaml +++ b/target/executable/preseq_lcextrap/.config.vsh.yaml @@ -70,7 +70,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -191,8 +191,8 @@ build_info: output: "target/executable/preseq_lcextrap" executable: "target/executable/preseq_lcextrap/preseq_lcextrap" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -203,7 +203,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/preseq_lcextrap/preseq_lcextrap b/target/executable/preseq_lcextrap/preseq_lcextrap index 0559a95..92106a8 100755 --- a/target/executable/preseq_lcextrap/preseq_lcextrap +++ b/target/executable/preseq_lcextrap/preseq_lcextrap @@ -495,9 +495,9 @@ mkdir build && cd build && \ make && make install && make HAVE_HTSLIB=1 all LABEL org.opencontainers.image.description="Companion container for running component preseq_lcextrap" -LABEL org.opencontainers.image.created="2024-11-27T08:42:26Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:47Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/qualimap/.config.vsh.yaml b/target/executable/qualimap/.config.vsh.yaml deleted file mode 100644 index 7694026..0000000 --- a/target/executable/qualimap/.config.vsh.yaml +++ /dev/null @@ -1,301 +0,0 @@ -name: "qualimap" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "path to input mapping file in BAM format." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--gtf" - description: "path to annotations file in Ensembl GTF format." - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output_dir" - description: "path to output directory for raw data and report." - info: null - default: - - "$id.qualimap_output" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_pdf" - description: "path to output file for pdf report." - info: null - default: - - "$id.report.pdf" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--output_format" - description: "Format of the output report (PDF or HTML, default is HTML)" - info: null - default: - - "html" - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Optional" - arguments: - - type: "integer" - name: "--pr_bases" - description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ - \ bias (default = 100)." - info: null - default: - - 100 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--tr_bias" - description: "Number of top highly expressed transcripts to compute 5'-3' bias\ - \ (default = 1000)." - info: null - default: - - 1000 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--algorithm" - description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)." - info: null - default: - - "uniquely-mapped-reads" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--sequencing_protocol" - description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\ - \ or non-strand-specific (default))." - info: null - default: - - "non-strand-specific" - required: false - choices: - - "non-strand-specific" - - "strand-specific-reverse" - - "strand-specific-forward" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean_true" - name: "--paired" - description: "Setting this flag for paired-end experiments will result in counting\ - \ fragments instead of reads." - info: null - direction: "input" - - type: "boolean_true" - name: "--sorted" - description: "Setting this flag indicates that the input file is already sorted\ - \ by name. If flag is not set, additional sorting by name will be performed.\ - \ Only requiredfor paired-end analysis." - info: null - direction: "input" - - type: "string" - name: "--java_memory_size" - description: "maximum Java heap memory size, default = 4G." - info: null - default: - - "4G" - required: false - direction: "input" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "RNA-seq QC analysis using the qualimap \n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam" -- type: "file" - path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai" -- type: "file" - path: "genes.gtf" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/qualimap/rnaseq/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "r-base" - - "unzip" - - "wget" - - "openjdk-8-jdk" - - "libxml2-dev" - - "libcurl4-openssl-dev" - interactive: false - - type: "docker" - run: - - "wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip &&\ - \ \\\nunzip qualimap_v2.3.zip && \\\ncp -a qualimap_v2.3/. usr/bin && \\\nunset\ - \ DISPLAY && \\\nmkdir -p tmp && \\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n" - - type: "r" - cran: - - "optparse" - bioc: - - "NOISeqr" - bioc_force_install: false - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/qualimap/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/qualimap" - executable: "target/executable/qualimap/qualimap" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/qualimap/qualimap b/target/executable/qualimap/qualimap deleted file mode 100755 index 8efca93..0000000 --- a/target/executable/qualimap/qualimap +++ /dev/null @@ -1,1369 +0,0 @@ -#!/usr/bin/env bash - -# qualimap main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="qualimap" -VIASH_META_FUNCTIONALITY_NAME="qualimap" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "qualimap main" - echo "" - echo "RNA-seq QC analysis using the qualimap" - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " path to input mapping file in BAM format." - echo "" - echo " --gtf" - echo " type: file, required parameter, file must exist" - echo " path to annotations file in Ensembl GTF format." - echo "" - echo "Output:" - echo " --output_dir" - echo " type: file, output, file must exist" - echo " default: \$id.qualimap_output" - echo " path to output directory for raw data and report." - echo "" - echo " --output_pdf" - echo " type: file, output" - echo " default: \$id.report.pdf" - echo " path to output file for pdf report." - echo "" - echo " --output_format" - echo " type: string" - echo " default: html" - echo " Format of the output report (PDF or HTML, default is HTML)" - echo "" - echo "Optional:" - echo " --pr_bases" - echo " type: integer" - echo " default: 100" - echo " min: 1" - echo " Number of upstream/downstream nucleotide bases to compute 5'-3' bias" - echo " (default = 100)." - echo "" - echo " --tr_bias" - echo " type: integer" - echo " default: 1000" - echo " min: 1" - echo " Number of top highly expressed transcripts to compute 5'-3' bias" - echo " (default = 1000)." - echo "" - echo " --algorithm" - echo " type: string" - echo " default: uniquely-mapped-reads" - echo " Counting algorithm (uniquely-mapped-reads (default) or proportional)." - echo "" - echo " --sequencing_protocol" - echo " type: string" - echo " default: non-strand-specific" - echo " choices: [ non-strand-specific, strand-specific-reverse," - echo "strand-specific-forward ]" - echo " Sequencing library protocol (strand-specific-forward," - echo " strand-specific-reverse or non-strand-specific (default))." - echo "" - echo " --paired" - echo " type: boolean_true" - echo " Setting this flag for paired-end experiments will result in counting" - echo " fragments instead of reads." - echo "" - echo " --sorted" - echo " type: boolean_true" - echo " Setting this flag indicates that the input file is already sorted by" - echo " name. If flag is not set, additional sorting by name will be performed." - echo " Only requiredfor paired-end analysis." - echo "" - echo " --java_memory_size" - echo " type: string" - echo " default: 4G" - echo " maximum Java heap memory size, default = 4G." -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y r-base unzip wget openjdk-8-jdk libxml2-dev libcurl4-openssl-dev && \ - rm -rf /var/lib/apt/lists/* - -RUN wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \ -unzip qualimap_v2.3.zip && \ -cp -a qualimap_v2.3/. usr/bin && \ -unset DISPLAY && \ -mkdir -p tmp && \ -export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - -RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ - Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \ - Rscript -e 'if (!requireNamespace("NOISeqr", quietly = TRUE)) BiocManager::install("NOISeqr")' && \ - Rscript -e 'remotes::install_cran(c("optparse"), repos = "https://cran.rstudio.com")' - -LABEL org.opencontainers.image.description="Companion container for running component qualimap" -LABEL org.opencontainers.image.created="2024-11-27T08:42:24Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "qualimap main" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --gtf) - [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GTF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gtf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gtf=*) - [ -n "$VIASH_PAR_GTF" ] && ViashError Bad arguments for option \'--gtf=*\': \'$VIASH_PAR_GTF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GTF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_dir) - [ -n "$VIASH_PAR_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--output_dir\': \'$VIASH_PAR_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_dir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_dir=*) - [ -n "$VIASH_PAR_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--output_dir=*\': \'$VIASH_PAR_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_pdf) - [ -n "$VIASH_PAR_OUTPUT_PDF" ] && ViashError Bad arguments for option \'--output_pdf\': \'$VIASH_PAR_OUTPUT_PDF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_PDF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_pdf. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_pdf=*) - [ -n "$VIASH_PAR_OUTPUT_PDF" ] && ViashError Bad arguments for option \'--output_pdf=*\': \'$VIASH_PAR_OUTPUT_PDF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_PDF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_format) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_format. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_format=*) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format=*\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --pr_bases) - [ -n "$VIASH_PAR_PR_BASES" ] && ViashError Bad arguments for option \'--pr_bases\': \'$VIASH_PAR_PR_BASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PR_BASES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --pr_bases. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --pr_bases=*) - [ -n "$VIASH_PAR_PR_BASES" ] && ViashError Bad arguments for option \'--pr_bases=*\': \'$VIASH_PAR_PR_BASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PR_BASES=$(ViashRemoveFlags "$1") - shift 1 - ;; - --tr_bias) - [ -n "$VIASH_PAR_TR_BIAS" ] && ViashError Bad arguments for option \'--tr_bias\': \'$VIASH_PAR_TR_BIAS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TR_BIAS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --tr_bias. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --tr_bias=*) - [ -n "$VIASH_PAR_TR_BIAS" ] && ViashError Bad arguments for option \'--tr_bias=*\': \'$VIASH_PAR_TR_BIAS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TR_BIAS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --algorithm) - [ -n "$VIASH_PAR_ALGORITHM" ] && ViashError Bad arguments for option \'--algorithm\': \'$VIASH_PAR_ALGORITHM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALGORITHM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --algorithm. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --algorithm=*) - [ -n "$VIASH_PAR_ALGORITHM" ] && ViashError Bad arguments for option \'--algorithm=*\': \'$VIASH_PAR_ALGORITHM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ALGORITHM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sequencing_protocol) - [ -n "$VIASH_PAR_SEQUENCING_PROTOCOL" ] && ViashError Bad arguments for option \'--sequencing_protocol\': \'$VIASH_PAR_SEQUENCING_PROTOCOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEQUENCING_PROTOCOL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sequencing_protocol. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sequencing_protocol=*) - [ -n "$VIASH_PAR_SEQUENCING_PROTOCOL" ] && ViashError Bad arguments for option \'--sequencing_protocol=*\': \'$VIASH_PAR_SEQUENCING_PROTOCOL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SEQUENCING_PROTOCOL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=true - shift 1 - ;; - --sorted) - [ -n "$VIASH_PAR_SORTED" ] && ViashError Bad arguments for option \'--sorted\': \'$VIASH_PAR_SORTED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SORTED=true - shift 1 - ;; - --java_memory_size) - [ -n "$VIASH_PAR_JAVA_MEMORY_SIZE" ] && ViashError Bad arguments for option \'--java_memory_size\': \'$VIASH_PAR_JAVA_MEMORY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_JAVA_MEMORY_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --java_memory_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --java_memory_size=*) - [ -n "$VIASH_PAR_JAVA_MEMORY_SIZE" ] && ViashError Bad arguments for option \'--java_memory_size=*\': \'$VIASH_PAR_JAVA_MEMORY_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_JAVA_MEMORY_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/qualimap:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_GTF+x} ]; then - ViashError '--gtf' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then - VIASH_PAR_OUTPUT_DIR="\$id.qualimap_output" -fi -if [ -z ${VIASH_PAR_OUTPUT_PDF+x} ]; then - VIASH_PAR_OUTPUT_PDF="\$id.report.pdf" -fi -if [ -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then - VIASH_PAR_OUTPUT_FORMAT="html" -fi -if [ -z ${VIASH_PAR_PR_BASES+x} ]; then - VIASH_PAR_PR_BASES="100" -fi -if [ -z ${VIASH_PAR_TR_BIAS+x} ]; then - VIASH_PAR_TR_BIAS="1000" -fi -if [ -z ${VIASH_PAR_ALGORITHM+x} ]; then - VIASH_PAR_ALGORITHM="uniquely-mapped-reads" -fi -if [ -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then - VIASH_PAR_SEQUENCING_PROTOCOL="non-strand-specific" -fi -if [ -z ${VIASH_PAR_PAIRED+x} ]; then - VIASH_PAR_PAIRED="false" -fi -if [ -z ${VIASH_PAR_SORTED+x} ]; then - VIASH_PAR_SORTED="false" -fi -if [ -z ${VIASH_PAR_JAVA_MEMORY_SIZE+x} ]; then - VIASH_PAR_JAVA_MEMORY_SIZE="4G" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_GTF" ] && [ ! -e "$VIASH_PAR_GTF" ]; then - ViashError "Input file '$VIASH_PAR_GTF' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PR_BASES" ]]; then - if ! [[ "$VIASH_PAR_PR_BASES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--pr_bases' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_PR_BASES -lt 1 ]]; then - ViashError '--pr_bases' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TR_BIAS" ]]; then - if ! [[ "$VIASH_PAR_TR_BIAS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--tr_bias' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_TR_BIAS -lt 1 ]]; then - ViashError '--tr_bias' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SORTED" ]]; then - if ! [[ "$VIASH_PAR_SORTED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--sorted' has to be a boolean_true. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_SEQUENCING_PROTOCOL" ]; then - VIASH_PAR_SEQUENCING_PROTOCOL_CHOICES=("non-strand-specific;strand-specific-reverse;strand-specific-forward") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_SEQUENCING_PROTOCOL_CHOICES[*]};" =~ ";$VIASH_PAR_SEQUENCING_PROTOCOL;" ]]; then - ViashError '--sequencing_protocol' specified value of \'$VIASH_PAR_SEQUENCING_PROTOCOL\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DIR")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DIR")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_PDF" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_PDF")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_PDF")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_GTF" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_GTF")" ) - VIASH_PAR_GTF=$(ViashDockerAutodetectMount "$VIASH_PAR_GTF") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DIR")" ) - VIASH_PAR_OUTPUT_DIR=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DIR") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DIR" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_PDF" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_PDF")" ) - VIASH_PAR_OUTPUT_PDF=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_PDF") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_PDF" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-qualimap-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\"'\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\"'\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_PDF+x} ]; then echo "${VIASH_PAR_OUTPUT_PDF}" | sed "s#'#'\"'\"'#g;s#.*#par_output_pdf='&'#" ; else echo "# par_output_pdf="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi ) -$( if [ ! -z ${VIASH_PAR_PR_BASES+x} ]; then echo "${VIASH_PAR_PR_BASES}" | sed "s#'#'\"'\"'#g;s#.*#par_pr_bases='&'#" ; else echo "# par_pr_bases="; fi ) -$( if [ ! -z ${VIASH_PAR_TR_BIAS+x} ]; then echo "${VIASH_PAR_TR_BIAS}" | sed "s#'#'\"'\"'#g;s#.*#par_tr_bias='&'#" ; else echo "# par_tr_bias="; fi ) -$( if [ ! -z ${VIASH_PAR_ALGORITHM+x} ]; then echo "${VIASH_PAR_ALGORITHM}" | sed "s#'#'\"'\"'#g;s#.*#par_algorithm='&'#" ; else echo "# par_algorithm="; fi ) -$( if [ ! -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then echo "${VIASH_PAR_SEQUENCING_PROTOCOL}" | sed "s#'#'\"'\"'#g;s#.*#par_sequencing_protocol='&'#" ; else echo "# par_sequencing_protocol="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_SORTED+x} ]; then echo "${VIASH_PAR_SORTED}" | sed "s#'#'\"'\"'#g;s#.*#par_sorted='&'#" ; else echo "# par_sorted="; fi ) -$( if [ ! -z ${VIASH_PAR_JAVA_MEMORY_SIZE+x} ]; then echo "${VIASH_PAR_JAVA_MEMORY_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_java_memory_size='&'#" ; else echo "# par_java_memory_size="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -mkdir -p \$par_output_dir - -qualimap rnaseq \\ - --java-mem-size=\$par_java_memory_size \\ - --algorithm \$par_algorithm \\ - --num-pr-bases \$par_pr_bases \\ - --num-tr-bias \$par_tr_bias \\ - --sequencing-protocol \$par_sequencing_protocol \\ - -bam \$par_input \\ - -gtf \$par_gtf \\ - \${par_paired:+-pe} \\ - \${par_sorted:+-s} \\ - -outdir \$par_output_dir \\ - -outformat \$par_output_format -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") - fi - if [ ! -z "$VIASH_PAR_GTF" ]; then - VIASH_PAR_GTF=$(ViashDockerStripAutomount "$VIASH_PAR_GTF") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ]; then - VIASH_PAR_OUTPUT_DIR=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DIR") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_PDF" ]; then - VIASH_PAR_OUTPUT_PDF=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_PDF") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ] && [ ! -e "$VIASH_PAR_OUTPUT_DIR" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_DIR' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml deleted file mode 100644 index 1659c42..0000000 --- a/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml +++ /dev/null @@ -1,329 +0,0 @@ -name: "rsem_calculate_expression" -namespace: "rsem" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "string" - name: "--id" - description: "Sample ID." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--strandedness" - description: "Sample strand-specificity. Must be one of unstranded, forward, reverse" - info: null - required: false - choices: - - "forward" - - "reverse" - - "unstranded" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--paired" - description: "Paired-end reads or not?" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input reads for quantification." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - - type: "file" - name: "--index" - description: "RSEM index." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--extra_args" - description: "Extra rsem-calculate-expression arguments in addition to the defaults." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--counts_gene" - description: "Expression counts on gene level" - info: null - example: - - "sample.genes.results" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--counts_transcripts" - description: "Expression counts on transcript level" - info: null - example: - - "sample.isoforms.results" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--stat" - description: "RSEM statistics" - info: null - example: - - "sample.stat" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--logs" - description: "RSEM logs" - info: null - example: - - "sample.log" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam_star" - description: "BAM file generated by STAR (optional)" - info: null - example: - - "sample.STAR.genome.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam_genome" - description: "Genome BAM file (optional)" - info: null - example: - - "sample.genome.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam_transcript" - description: "Transcript BAM file (optional)" - info: null - example: - - "sample.transcript.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Calculate expression with RSEM.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -- type: "file" - path: "rsem.tar.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rsem/calculateexpression/main.nf" - - "modules/nf-core/rsem/calculateexpression/meta.yml" - last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "build-essential" - - "gcc" - - "g++" - - "make" - - "wget" - - "zlib1g-dev" - - "unzip" - - "xxd" - - "perl" - - "r-base" - - "bowtie2" - - "python3-pip" - - "git" - interactive: false - - type: "docker" - run: - - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\ - \ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\nunzip ${STAR_VERSION}.zip && \\\ncd STAR-${STAR_VERSION}/source &&\ - \ \\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\ncp STAR /usr/local/bin\ - \ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip\ - \ && \\\nunzip v${RSEM_VERSION}.zip && \\\ncd RSEM-${RSEM_VERSION} && \\\nmake\ - \ && \\\nmake install && \\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\nrm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \\\n\ - cd && \\\napt-get clean && \\\necho 'export PATH=$PATH:/usr/local/bin' >> /etc/profile\ - \ && \\\necho 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \\\n/bin/bash\ - \ -c \"source /etc/profile && source ~/.bashrc && echo $PATH && which STAR\"\ - \n" - env: - - "STAR_VERSION=2.7.11b" - - "RSEM_VERSION=1.3.3" - - "TZ=Europe/Brussels" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/rsem/rsem_calculate_expression/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/rsem/rsem_calculate_expression" - executable: "target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression b/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression deleted file mode 100755 index 914f877..0000000 --- a/target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression +++ /dev/null @@ -1,1465 +0,0 @@ -#!/usr/bin/env bash - -# rsem_calculate_expression main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="rsem_calculate_expression" -VIASH_META_FUNCTIONALITY_NAME="rsem_calculate_expression" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rsem_calculate_expression main" - echo "" - echo "Calculate expression with RSEM." - echo "" - echo "Input:" - echo " --id" - echo " type: string" - echo " Sample ID." - echo "" - echo " --strandedness" - echo " type: string" - echo " choices: [ forward, reverse, unstranded ]" - echo " Sample strand-specificity. Must be one of unstranded, forward, reverse" - echo "" - echo " --paired" - echo " type: boolean" - echo " Paired-end reads or not?" - echo "" - echo " --input" - echo " type: file, multiple values allowed, file must exist" - echo " Input reads for quantification." - echo "" - echo " --index" - echo " type: file, file must exist" - echo " RSEM index." - echo "" - echo " --extra_args" - echo " type: string" - echo " Extra rsem-calculate-expression arguments in addition to the defaults." - echo "" - echo "Output:" - echo " --counts_gene" - echo " type: file, output, file must exist" - echo " example: sample.genes.results" - echo " Expression counts on gene level" - echo "" - echo " --counts_transcripts" - echo " type: file, output, file must exist" - echo " example: sample.isoforms.results" - echo " Expression counts on transcript level" - echo "" - echo " --stat" - echo " type: file, output, file must exist" - echo " example: sample.stat" - echo " RSEM statistics" - echo "" - echo " --logs" - echo " type: file, output, file must exist" - echo " example: sample.log" - echo " RSEM logs" - echo "" - echo " --bam_star" - echo " type: file, output, file must exist" - echo " example: sample.STAR.genome.bam" - echo " BAM file generated by STAR (optional)" - echo "" - echo " --bam_genome" - echo " type: file, output, file must exist" - echo " example: sample.genome.bam" - echo " Genome BAM file (optional)" - echo "" - echo " --bam_transcript" - echo " type: file, output, file must exist" - echo " example: sample.transcript.bam" - echo " Transcript BAM file (optional)" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential gcc g++ make wget zlib1g-dev unzip xxd perl r-base bowtie2 python3-pip git && \ - rm -rf /var/lib/apt/lists/* - -ENV STAR_VERSION=2.7.11b -ENV RSEM_VERSION=1.3.3 -ENV TZ=Europe/Brussels -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ -cd /tmp && \ -wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ -unzip ${STAR_VERSION}.zip && \ -cd STAR-${STAR_VERSION}/source && \ -make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ -cp STAR /usr/local/bin && \ -cd /tmp && \ -wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \ -unzip v${RSEM_VERSION}.zip && \ -cd RSEM-${RSEM_VERSION} && \ -make && \ -make install && \ -rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ -rm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \ -cd && \ -apt-get clean && \ -echo 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \ -echo 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \ -/bin/bash -c "source /etc/profile && source ~/.bashrc && echo $PATH && which STAR" - -LABEL org.opencontainers.image.description="Companion container for running component rsem rsem_calculate_expression" -LABEL org.opencontainers.image.created="2024-11-27T08:42:24Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "rsem_calculate_expression main" - exit - ;; - --id) - [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --id. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --id=*) - [ -n "$VIASH_PAR_ID" ] && ViashError Bad arguments for option \'--id=*\': \'$VIASH_PAR_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ID=$(ViashRemoveFlags "$1") - shift 1 - ;; - --strandedness) - [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDEDNESS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --strandedness. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --strandedness=*) - [ -n "$VIASH_PAR_STRANDEDNESS" ] && ViashError Bad arguments for option \'--strandedness=*\': \'$VIASH_PAR_STRANDEDNESS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDEDNESS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --index) - [ -n "$VIASH_PAR_INDEX" ] && ViashError Bad arguments for option \'--index\': \'$VIASH_PAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --index. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --index=*) - [ -n "$VIASH_PAR_INDEX" ] && ViashError Bad arguments for option \'--index=*\': \'$VIASH_PAR_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INDEX=$(ViashRemoveFlags "$1") - shift 1 - ;; - --extra_args) - [ -n "$VIASH_PAR_EXTRA_ARGS" ] && ViashError Bad arguments for option \'--extra_args\': \'$VIASH_PAR_EXTRA_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_args=*) - [ -n "$VIASH_PAR_EXTRA_ARGS" ] && ViashError Bad arguments for option \'--extra_args=*\': \'$VIASH_PAR_EXTRA_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --counts_gene) - [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COUNTS_GENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_gene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --counts_gene=*) - [ -n "$VIASH_PAR_COUNTS_GENE" ] && ViashError Bad arguments for option \'--counts_gene=*\': \'$VIASH_PAR_COUNTS_GENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COUNTS_GENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --counts_transcripts) - [ -n "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--counts_transcripts\': \'$VIASH_PAR_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COUNTS_TRANSCRIPTS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --counts_transcripts. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --counts_transcripts=*) - [ -n "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && ViashError Bad arguments for option \'--counts_transcripts=*\': \'$VIASH_PAR_COUNTS_TRANSCRIPTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --stat) - [ -n "$VIASH_PAR_STAT" ] && ViashError Bad arguments for option \'--stat\': \'$VIASH_PAR_STAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --stat. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --stat=*) - [ -n "$VIASH_PAR_STAT" ] && ViashError Bad arguments for option \'--stat=*\': \'$VIASH_PAR_STAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STAT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --logs) - [ -n "$VIASH_PAR_LOGS" ] && ViashError Bad arguments for option \'--logs\': \'$VIASH_PAR_LOGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --logs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --logs=*) - [ -n "$VIASH_PAR_LOGS" ] && ViashError Bad arguments for option \'--logs=*\': \'$VIASH_PAR_LOGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOGS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bam_star) - [ -n "$VIASH_PAR_BAM_STAR" ] && ViashError Bad arguments for option \'--bam_star\': \'$VIASH_PAR_BAM_STAR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM_STAR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_star. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam_star=*) - [ -n "$VIASH_PAR_BAM_STAR" ] && ViashError Bad arguments for option \'--bam_star=*\': \'$VIASH_PAR_BAM_STAR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM_STAR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bam_genome) - [ -n "$VIASH_PAR_BAM_GENOME" ] && ViashError Bad arguments for option \'--bam_genome\': \'$VIASH_PAR_BAM_GENOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM_GENOME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_genome. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam_genome=*) - [ -n "$VIASH_PAR_BAM_GENOME" ] && ViashError Bad arguments for option \'--bam_genome=*\': \'$VIASH_PAR_BAM_GENOME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM_GENOME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bam_transcript) - [ -n "$VIASH_PAR_BAM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--bam_transcript\': \'$VIASH_PAR_BAM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM_TRANSCRIPT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam_transcript. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam_transcript=*) - [ -n "$VIASH_PAR_BAM_TRANSCRIPT" ] && ViashError Bad arguments for option \'--bam_transcript=*\': \'$VIASH_PAR_BAM_TRANSCRIPT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM_TRANSCRIPT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rsem/rsem_calculate_expression:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi -if [ ! -z "$VIASH_PAR_INDEX" ] && [ ! -e "$VIASH_PAR_INDEX" ]; then - ViashError "Input file '$VIASH_PAR_INDEX' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_STRANDEDNESS" ]; then - VIASH_PAR_STRANDEDNESS_CHOICES=("forward;reverse;unstranded") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_STRANDEDNESS_CHOICES[*]};" =~ ";$VIASH_PAR_STRANDEDNESS;" ]]; then - ViashError '--strandedness' specified value of \'$VIASH_PAR_STRANDEDNESS\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_GENE")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_COUNTS_GENE")" -fi -if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && [ ! -d "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPTS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_COUNTS_TRANSCRIPTS")" -fi -if [ ! -z "$VIASH_PAR_STAT" ] && [ ! -d "$(dirname "$VIASH_PAR_STAT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_STAT")" -fi -if [ ! -z "$VIASH_PAR_LOGS" ] && [ ! -d "$(dirname "$VIASH_PAR_LOGS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_LOGS")" -fi -if [ ! -z "$VIASH_PAR_BAM_STAR" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_STAR")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_BAM_STAR")" -fi -if [ ! -z "$VIASH_PAR_BAM_GENOME" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_GENOME")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_BAM_GENOME")" -fi -if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT" ] && [ ! -d "$(dirname "$VIASH_PAR_BAM_TRANSCRIPT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_BAM_TRANSCRIPT")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_INDEX" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INDEX")" ) - VIASH_PAR_INDEX=$(ViashDockerAutodetectMount "$VIASH_PAR_INDEX") -fi -if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_GENE")" ) - VIASH_PAR_COUNTS_GENE=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_GENE") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_COUNTS_GENE" ) -fi -if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_COUNTS_TRANSCRIPTS")" ) - VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashDockerAutodetectMount "$VIASH_PAR_COUNTS_TRANSCRIPTS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_COUNTS_TRANSCRIPTS" ) -fi -if [ ! -z "$VIASH_PAR_STAT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_STAT")" ) - VIASH_PAR_STAT=$(ViashDockerAutodetectMount "$VIASH_PAR_STAT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_STAT" ) -fi -if [ ! -z "$VIASH_PAR_LOGS" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_LOGS")" ) - VIASH_PAR_LOGS=$(ViashDockerAutodetectMount "$VIASH_PAR_LOGS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_LOGS" ) -fi -if [ ! -z "$VIASH_PAR_BAM_STAR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM_STAR")" ) - VIASH_PAR_BAM_STAR=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM_STAR") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAM_STAR" ) -fi -if [ ! -z "$VIASH_PAR_BAM_GENOME" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM_GENOME")" ) - VIASH_PAR_BAM_GENOME=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM_GENOME") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAM_GENOME" ) -fi -if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM_TRANSCRIPT")" ) - VIASH_PAR_BAM_TRANSCRIPT=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM_TRANSCRIPT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_BAM_TRANSCRIPT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rsem_calculate_expression-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\"'\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi ) -$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\"'\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\"'\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi ) -$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi ) -$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi ) -$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\"'\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi ) -$( if [ ! -z ${VIASH_PAR_STAT+x} ]; then echo "${VIASH_PAR_STAT}" | sed "s#'#'\"'\"'#g;s#.*#par_stat='&'#" ; else echo "# par_stat="; fi ) -$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\"'\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM_STAR+x} ]; then echo "${VIASH_PAR_BAM_STAR}" | sed "s#'#'\"'\"'#g;s#.*#par_bam_star='&'#" ; else echo "# par_bam_star="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM_GENOME+x} ]; then echo "${VIASH_PAR_BAM_GENOME}" | sed "s#'#'\"'\"'#g;s#.*#par_bam_genome='&'#" ; else echo "# par_bam_genome="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_BAM_TRANSCRIPT}" | sed "s#'#'\"'\"'#g;s#.*#par_bam_transcript='&'#" ; else echo "# par_bam_transcript="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") - -[[ "\$par_paired" == "false" ]] && unset par_paired - -if [ \$par_strandedness == 'forward' ]; then - strandedness='--strandedness forward' -elif [ \$par_strandedness == 'reverse' ]; then - strandedness='--strandedness reverse' -else - strandedness='' -fi - -IFS=";" read -ra input <<< \$par_input - -INDEX=\`find -L \$par_index/ -name "*.grp" | sed 's/\\.grp\$//'\` - -rsem-calculate-expression \\ - \${meta_cpus:+--num-threads \$meta_cpus} \\ - \$strandedness \\ - \${par_paired:+--paired-end} \\ - \$par_extra_args \\ - \${input[*]} \\ - \$INDEX \\ - \$par_id - -[[ -e "\${par_id}.genes.results" ]] && mv "\${par_id}.genes.results" \$par_counts_gene -[[ -e "\${par_id}id.isoforms.results" ]] && mv "\${par_id}id.isoforms.results" \$par_counts_transcripts -[[ -e "\${par_id}.stat" ]] && mv "\${par_id}.stat" \$par_stat -# [[ -e "\${par_id}.log" ]] && mv "\${par_id}.log" \$par_logs -[[ -e "\${par_id}.STAR.genome.bam" ]] && mv "\${par_id}.STAR.genome.bam" \$par_bam_star -[[ -e "\${par_id}.genome.bam" ]] && mv "\${par_id}.genome.bam" \$par_bam_genome -[[ -e "\${par_id}.transcript.bam" ]] && mv "\${par_id}.transcript.bam" \$par_bam_transcript -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_INDEX" ]; then - VIASH_PAR_INDEX=$(ViashDockerStripAutomount "$VIASH_PAR_INDEX") - fi - if [ ! -z "$VIASH_PAR_COUNTS_GENE" ]; then - VIASH_PAR_COUNTS_GENE=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_GENE") - fi - if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then - VIASH_PAR_COUNTS_TRANSCRIPTS=$(ViashDockerStripAutomount "$VIASH_PAR_COUNTS_TRANSCRIPTS") - fi - if [ ! -z "$VIASH_PAR_STAT" ]; then - VIASH_PAR_STAT=$(ViashDockerStripAutomount "$VIASH_PAR_STAT") - fi - if [ ! -z "$VIASH_PAR_LOGS" ]; then - VIASH_PAR_LOGS=$(ViashDockerStripAutomount "$VIASH_PAR_LOGS") - fi - if [ ! -z "$VIASH_PAR_BAM_STAR" ]; then - VIASH_PAR_BAM_STAR=$(ViashDockerStripAutomount "$VIASH_PAR_BAM_STAR") - fi - if [ ! -z "$VIASH_PAR_BAM_GENOME" ]; then - VIASH_PAR_BAM_GENOME=$(ViashDockerStripAutomount "$VIASH_PAR_BAM_GENOME") - fi - if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT" ]; then - VIASH_PAR_BAM_TRANSCRIPT=$(ViashDockerStripAutomount "$VIASH_PAR_BAM_TRANSCRIPT") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_COUNTS_GENE" ] && [ ! -e "$VIASH_PAR_COUNTS_GENE" ]; then - ViashError "Output file '$VIASH_PAR_COUNTS_GENE' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_COUNTS_TRANSCRIPTS" ] && [ ! -e "$VIASH_PAR_COUNTS_TRANSCRIPTS" ]; then - ViashError "Output file '$VIASH_PAR_COUNTS_TRANSCRIPTS' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_STAT" ] && [ ! -e "$VIASH_PAR_STAT" ]; then - ViashError "Output file '$VIASH_PAR_STAT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_LOGS" ] && [ ! -e "$VIASH_PAR_LOGS" ]; then - ViashError "Output file '$VIASH_PAR_LOGS' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BAM_STAR" ] && [ ! -e "$VIASH_PAR_BAM_STAR" ]; then - ViashError "Output file '$VIASH_PAR_BAM_STAR' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BAM_GENOME" ] && [ ! -e "$VIASH_PAR_BAM_GENOME" ]; then - ViashError "Output file '$VIASH_PAR_BAM_GENOME' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BAM_TRANSCRIPT" ] && [ ! -e "$VIASH_PAR_BAM_TRANSCRIPT" ]; then - ViashError "Output file '$VIASH_PAR_BAM_TRANSCRIPT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/rsem/rsem_merge_counts/.config.vsh.yaml b/target/executable/rsem_merge_counts/.config.vsh.yaml similarity index 93% rename from target/executable/rsem/rsem_merge_counts/.config.vsh.yaml rename to target/executable/rsem_merge_counts/.config.vsh.yaml index d09129b..99e062a 100644 --- a/target/executable/rsem/rsem_merge_counts/.config.vsh.yaml +++ b/target/executable/rsem_merge_counts/.config.vsh.yaml @@ -1,5 +1,4 @@ name: "rsem_merge_counts" -namespace: "rsem" version: "main" argument_groups: - name: "Input" @@ -93,7 +92,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -176,14 +175,14 @@ engines: - type: "native" id: "native" build_info: - config: "src/rsem/rsem_merge_counts/config.vsh.yaml" + config: "src/rsem_merge_counts/config.vsh.yaml" runner: "executable" engine: "docker|native" - output: "target/executable/rsem/rsem_merge_counts" - executable: "target/executable/rsem/rsem_merge_counts/rsem_merge_counts" + output: "target/executable/rsem_merge_counts" + executable: "target/executable/rsem_merge_counts/rsem_merge_counts" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -194,7 +193,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/rsem/rsem_merge_counts/rsem_merge_counts b/target/executable/rsem_merge_counts/rsem_merge_counts similarity index 99% rename from target/executable/rsem/rsem_merge_counts/rsem_merge_counts rename to target/executable/rsem_merge_counts/rsem_merge_counts index 0c92756..fd2adf4 100755 --- a/target/executable/rsem/rsem_merge_counts/rsem_merge_counts +++ b/target/executable/rsem_merge_counts/rsem_merge_counts @@ -482,10 +482,10 @@ function ViashDockerfile { cat << 'VIASHDOCKER' FROM ubuntu:22.04 ENTRYPOINT [] -LABEL org.opencontainers.image.description="Companion container for running component rsem rsem_merge_counts" -LABEL org.opencontainers.image.created="2024-11-27T08:42:23Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.description="Companion container for running component rsem_merge_counts" +LABEL org.opencontainers.image.created="2024-11-27T11:43:49Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER @@ -779,7 +779,7 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then # determine docker image id if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rsem/rsem_merge_counts:main' + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rsem_merge_counts:main' fi # print dockerfile diff --git a/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml b/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml deleted file mode 100644 index 01d707f..0000000 --- a/target/executable/rseqc/rseqc_bamstat/.config.vsh.yaml +++ /dev/null @@ -1,193 +0,0 @@ -name: "rseqc_bamstat" -namespace: "rseqc" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "input alignment file in BAM or SAM format" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--map_qual" - description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ - \ reads, default=30." - info: null - default: - - 30 - required: false - min: 0 - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output" - description: "output file (txt) with mapping quality statistics" - info: null - default: - - "$id.mapping_quality.txt" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Generate statistics from a bam file.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "test.paired_end.sorted.bam" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rseqc/bamstat/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "python3-pip" - interactive: false - - type: "python" - user: false - packages: - - "RSeQC" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/rseqc/rseqc_bamstat/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/rseqc/rseqc_bamstat" - executable: "target/executable/rseqc/rseqc_bamstat/rseqc_bamstat" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat b/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat deleted file mode 100755 index daf5db3..0000000 --- a/target/executable/rseqc/rseqc_bamstat/rseqc_bamstat +++ /dev/null @@ -1,1118 +0,0 @@ -#!/usr/bin/env bash - -# rseqc_bamstat main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="rseqc_bamstat" -VIASH_META_FUNCTIONALITY_NAME="rseqc_bamstat" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_bamstat main" - echo "" - echo "Generate statistics from a bam file." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --map_qual" - echo " type: integer" - echo " default: 30" - echo " min: 0" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: \$id.mapping_quality.txt" - echo " output file (txt) with mapping quality statistics" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "RSeQC" - -LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_bamstat" -LABEL org.opencontainers.image.created="2024-11-27T08:42:28Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "rseqc_bamstat main" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --map_qual) - [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAP_QUAL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --map_qual=*) - [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_bamstat:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then - VIASH_PAR_MAP_QUAL="30" -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="\$id.mapping_quality.txt" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then - if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then - ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_bamstat-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\"'\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -bam_stat.py \\ - --input \$par_input \\ - --mapq \$par_map_qual \\ -> \$par_output -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") - fi - if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml b/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml deleted file mode 100644 index a18522e..0000000 --- a/target/executable/rseqc/rseqc_inferexperiment/.config.vsh.yaml +++ /dev/null @@ -1,216 +0,0 @@ -name: "rseqc_inferexperiment" -namespace: "rseqc" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "input alignment file in BAM or SAM format" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--refgene" - description: "Reference gene model in bed format" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--sample_size" - description: "Numer of reads sampled from SAM/BAM file, default = 200000." - info: null - default: - - 200000 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--map_qual" - description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ - \ reads, default=30." - info: null - default: - - 30 - required: false - min: 0 - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output" - description: "output file (txt) of strandness report" - info: null - default: - - "$id.strandedness.txt" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Infer strandedness from sequencing reads\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "test.paired_end.sorted.bam" -- type: "file" - path: "test.bed12" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rseqc/inferexperiment/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "python3-pip" - interactive: false - - type: "python" - user: false - packages: - - "RSeQC" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/rseqc/rseqc_inferexperiment/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/rseqc/rseqc_inferexperiment" - executable: "target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment b/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment deleted file mode 100755 index 3775154..0000000 --- a/target/executable/rseqc/rseqc_inferexperiment/rseqc_inferexperiment +++ /dev/null @@ -1,1182 +0,0 @@ -#!/usr/bin/env bash - -# rseqc_inferexperiment main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="rseqc_inferexperiment" -VIASH_META_FUNCTIONALITY_NAME="rseqc_inferexperiment" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_inferexperiment main" - echo "" - echo "Infer strandedness from sequencing reads" - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo " --sample_size" - echo " type: integer" - echo " default: 200000" - echo " min: 1" - echo " Numer of reads sampled from SAM/BAM file, default = 200000." - echo "" - echo " --map_qual" - echo " type: integer" - echo " default: 30" - echo " min: 0" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: \$id.strandedness.txt" - echo " output file (txt) of strandness report" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "RSeQC" - -LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_inferexperiment" -LABEL org.opencontainers.image.created="2024-11-27T08:42:30Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "rseqc_inferexperiment main" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --refgene) - [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFGENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --refgene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --refgene=*) - [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene=*\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFGENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sample_size) - [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_size=*) - [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size=*\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --map_qual) - [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAP_QUAL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --map_qual=*) - [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_inferexperiment:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFGENE+x} ]; then - ViashError '--refgene' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then - VIASH_PAR_SAMPLE_SIZE="200000" -fi -if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then - VIASH_PAR_MAP_QUAL="30" -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="\$id.strandedness.txt" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFGENE" ] && [ ! -e "$VIASH_PAR_REFGENE" ]; then - ViashError "Input file '$VIASH_PAR_REFGENE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_SAMPLE_SIZE" ]]; then - if ! [[ "$VIASH_PAR_SAMPLE_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sample_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_SAMPLE_SIZE -lt 1 ]]; then - ViashError '--sample_size' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then - if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then - ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFGENE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REFGENE")" ) - VIASH_PAR_REFGENE=$(ViashDockerAutodetectMount "$VIASH_PAR_REFGENE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_inferexperiment-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\"'\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) -$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\"'\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -infer_experiment.py \\ - -i \$par_input \\ - -r \$par_refgene \\ - -s \$par_sample_size \\ - -q \$par_map_qual \\ -> \$par_output -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") - fi - if [ ! -z "$VIASH_PAR_REFGENE" ]; then - VIASH_PAR_REFGENE=$(ViashDockerStripAutomount "$VIASH_PAR_REFGENE") - fi - if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/rseqc/rseqc_innerdistance/.config.vsh.yaml b/target/executable/rseqc/rseqc_innerdistance/.config.vsh.yaml deleted file mode 100644 index 6ebfaee..0000000 --- a/target/executable/rseqc/rseqc_innerdistance/.config.vsh.yaml +++ /dev/null @@ -1,302 +0,0 @@ -name: "rseqc_innerdistance" -namespace: "rseqc" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "input alignment file in BAM or SAM format" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--refgene" - description: "Reference gene model in bed format" - info: null - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--sample_size" - description: "Numer of reads sampled from SAM/BAM file, default = 200000." - info: null - default: - - 200000 - required: false - min: 1 - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--map_qual" - description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\ - \ reads, default=30." - info: null - default: - - 30 - required: false - min: 0 - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--lower_bound_size" - description: "Lower bound of inner distance (bp). This option is used for ploting\ - \ histograme, default=-250." - info: null - default: - - -250 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--upper_bound_size" - description: "Upper bound of inner distance (bp). This option is used for ploting\ - \ histograme, default=250." - info: null - default: - - 250 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--step_size" - description: "Step size (bp) of histograme. This option is used for plotting histogram,\ - \ default=5." - info: null - default: - - 5 - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output_stats" - description: "output file (txt) with summary statistics of inner distances of\ - \ paired reads" - info: null - default: - - "$id.inner_distance.stats" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_dist" - description: "output file (txt) with inner distances of all paired reads" - info: null - default: - - "$id.inner_distance.txt" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_freq" - description: "output file (txt) with frequencies of inner distances of all paired\ - \ reads" - info: null - default: - - "$id.inner_distance_freq.txt" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_plot" - description: "output file (pdf) with histogram plot of of inner distances of all\ - \ paired reads" - info: null - default: - - "$id.inner_distance_plot.pdf" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_plot_r" - description: "output file (R) with script of histogram plot of of inner distances\ - \ of all paired reads" - info: null - default: - - "$id.inner_distance_plot.r" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Calculate inner distance between read pairs. \n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "test.paired_end.sorted.bam" -- type: "file" - path: "test.bed12" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rseqc/innerdistance/main.nf" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "python3-pip" - - "r-base" - interactive: false - - type: "python" - user: false - packages: - - "RSeQC" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/rseqc/rseqc_innerdistance/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/rseqc/rseqc_innerdistance" - executable: "target/executable/rseqc/rseqc_innerdistance/rseqc_innerdistance" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/rseqc/rseqc_innerdistance/rseqc_innerdistance b/target/executable/rseqc/rseqc_innerdistance/rseqc_innerdistance deleted file mode 100755 index 0c1cadd..0000000 --- a/target/executable/rseqc/rseqc_innerdistance/rseqc_innerdistance +++ /dev/null @@ -1,1397 +0,0 @@ -#!/usr/bin/env bash - -# rseqc_innerdistance main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="rseqc_innerdistance" -VIASH_META_FUNCTIONALITY_NAME="rseqc_innerdistance" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "rseqc_innerdistance main" - echo "" - echo "Calculate inner distance between read pairs." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " input alignment file in BAM or SAM format" - echo "" - echo " --refgene" - echo " type: file, required parameter, file must exist" - echo " Reference gene model in bed format" - echo "" - echo " --sample_size" - echo " type: integer" - echo " default: 200000" - echo " min: 1" - echo " Numer of reads sampled from SAM/BAM file, default = 200000." - echo "" - echo " --map_qual" - echo " type: integer" - echo " default: 30" - echo " min: 0" - echo " Minimum mapping quality (phred scaled) to determine uniquely mapped" - echo " reads, default=30." - echo "" - echo " --lower_bound_size" - echo " type: integer" - echo " default: -250" - echo " Lower bound of inner distance (bp). This option is used for ploting" - echo " histograme, default=-250." - echo "" - echo " --upper_bound_size" - echo " type: integer" - echo " default: 250" - echo " Upper bound of inner distance (bp). This option is used for ploting" - echo " histograme, default=250." - echo "" - echo " --step_size" - echo " type: integer" - echo " default: 5" - echo " Step size (bp) of histograme. This option is used for plotting" - echo " histogram, default=5." - echo "" - echo "Output:" - echo " --output_stats" - echo " type: file, output" - echo " default: \$id.inner_distance.stats" - echo " output file (txt) with summary statistics of inner distances of paired" - echo " reads" - echo "" - echo " --output_dist" - echo " type: file, output" - echo " default: \$id.inner_distance.txt" - echo " output file (txt) with inner distances of all paired reads" - echo "" - echo " --output_freq" - echo " type: file, output" - echo " default: \$id.inner_distance_freq.txt" - echo " output file (txt) with frequencies of inner distances of all paired" - echo " reads" - echo "" - echo " --output_plot" - echo " type: file, output" - echo " default: \$id.inner_distance_plot.pdf" - echo " output file (pdf) with histogram plot of of inner distances of all" - echo " paired reads" - echo "" - echo " --output_plot_r" - echo " type: file, output" - echo " default: \$id.inner_distance_plot.r" - echo " output file (R) with script of histogram plot of of inner distances of" - echo " all paired reads" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y python3-pip r-base && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "RSeQC" - -LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_innerdistance" -LABEL org.opencontainers.image.created="2024-11-27T08:42:27Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "rseqc_innerdistance main" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --refgene) - [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFGENE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --refgene. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --refgene=*) - [ -n "$VIASH_PAR_REFGENE" ] && ViashError Bad arguments for option \'--refgene=*\': \'$VIASH_PAR_REFGENE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_REFGENE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --sample_size) - [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --sample_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --sample_size=*) - [ -n "$VIASH_PAR_SAMPLE_SIZE" ] && ViashError Bad arguments for option \'--sample_size=*\': \'$VIASH_PAR_SAMPLE_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SAMPLE_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --map_qual) - [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAP_QUAL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --map_qual. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --map_qual=*) - [ -n "$VIASH_PAR_MAP_QUAL" ] && ViashError Bad arguments for option \'--map_qual=*\': \'$VIASH_PAR_MAP_QUAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAP_QUAL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --lower_bound_size) - [ -n "$VIASH_PAR_LOWER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--lower_bound_size\': \'$VIASH_PAR_LOWER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOWER_BOUND_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --lower_bound_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --lower_bound_size=*) - [ -n "$VIASH_PAR_LOWER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--lower_bound_size=*\': \'$VIASH_PAR_LOWER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOWER_BOUND_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --upper_bound_size) - [ -n "$VIASH_PAR_UPPER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--upper_bound_size\': \'$VIASH_PAR_UPPER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UPPER_BOUND_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --upper_bound_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --upper_bound_size=*) - [ -n "$VIASH_PAR_UPPER_BOUND_SIZE" ] && ViashError Bad arguments for option \'--upper_bound_size=*\': \'$VIASH_PAR_UPPER_BOUND_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UPPER_BOUND_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --step_size) - [ -n "$VIASH_PAR_STEP_SIZE" ] && ViashError Bad arguments for option \'--step_size\': \'$VIASH_PAR_STEP_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STEP_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --step_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --step_size=*) - [ -n "$VIASH_PAR_STEP_SIZE" ] && ViashError Bad arguments for option \'--step_size=*\': \'$VIASH_PAR_STEP_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STEP_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_stats) - [ -n "$VIASH_PAR_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--output_stats\': \'$VIASH_PAR_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_STATS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_stats. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_stats=*) - [ -n "$VIASH_PAR_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--output_stats=*\': \'$VIASH_PAR_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_STATS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_dist) - [ -n "$VIASH_PAR_OUTPUT_DIST" ] && ViashError Bad arguments for option \'--output_dist\': \'$VIASH_PAR_OUTPUT_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_dist. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_dist=*) - [ -n "$VIASH_PAR_OUTPUT_DIST" ] && ViashError Bad arguments for option \'--output_dist=*\': \'$VIASH_PAR_OUTPUT_DIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIST=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_freq) - [ -n "$VIASH_PAR_OUTPUT_FREQ" ] && ViashError Bad arguments for option \'--output_freq\': \'$VIASH_PAR_OUTPUT_FREQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FREQ="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_freq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_freq=*) - [ -n "$VIASH_PAR_OUTPUT_FREQ" ] && ViashError Bad arguments for option \'--output_freq=*\': \'$VIASH_PAR_OUTPUT_FREQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FREQ=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_plot) - [ -n "$VIASH_PAR_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--output_plot\': \'$VIASH_PAR_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_PLOT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_plot. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_plot=*) - [ -n "$VIASH_PAR_OUTPUT_PLOT" ] && ViashError Bad arguments for option \'--output_plot=*\': \'$VIASH_PAR_OUTPUT_PLOT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_PLOT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_plot_r) - [ -n "$VIASH_PAR_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--output_plot_r\': \'$VIASH_PAR_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_PLOT_R="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_plot_r. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_plot_r=*) - [ -n "$VIASH_PAR_OUTPUT_PLOT_R" ] && ViashError Bad arguments for option \'--output_plot_r=*\': \'$VIASH_PAR_OUTPUT_PLOT_R\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_PLOT_R=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/rseqc/rseqc_innerdistance:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_REFGENE+x} ]; then - ViashError '--refgene' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then - VIASH_PAR_SAMPLE_SIZE="200000" -fi -if [ -z ${VIASH_PAR_MAP_QUAL+x} ]; then - VIASH_PAR_MAP_QUAL="30" -fi -if [ -z ${VIASH_PAR_LOWER_BOUND_SIZE+x} ]; then - VIASH_PAR_LOWER_BOUND_SIZE="-250" -fi -if [ -z ${VIASH_PAR_UPPER_BOUND_SIZE+x} ]; then - VIASH_PAR_UPPER_BOUND_SIZE="250" -fi -if [ -z ${VIASH_PAR_STEP_SIZE+x} ]; then - VIASH_PAR_STEP_SIZE="5" -fi -if [ -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then - VIASH_PAR_OUTPUT_STATS="\$id.inner_distance.stats" -fi -if [ -z ${VIASH_PAR_OUTPUT_DIST+x} ]; then - VIASH_PAR_OUTPUT_DIST="\$id.inner_distance.txt" -fi -if [ -z ${VIASH_PAR_OUTPUT_FREQ+x} ]; then - VIASH_PAR_OUTPUT_FREQ="\$id.inner_distance_freq.txt" -fi -if [ -z ${VIASH_PAR_OUTPUT_PLOT+x} ]; then - VIASH_PAR_OUTPUT_PLOT="\$id.inner_distance_plot.pdf" -fi -if [ -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then - VIASH_PAR_OUTPUT_PLOT_R="\$id.inner_distance_plot.r" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_REFGENE" ] && [ ! -e "$VIASH_PAR_REFGENE" ]; then - ViashError "Input file '$VIASH_PAR_REFGENE' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_SAMPLE_SIZE" ]]; then - if ! [[ "$VIASH_PAR_SAMPLE_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--sample_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_SAMPLE_SIZE -lt 1 ]]; then - ViashError '--sample_size' has be more than or equal to 1. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAP_QUAL" ]]; then - if ! [[ "$VIASH_PAR_MAP_QUAL" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--map_qual' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi - if [[ $VIASH_PAR_MAP_QUAL -lt 0 ]]; then - ViashError '--map_qual' has be more than or equal to 0. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LOWER_BOUND_SIZE" ]]; then - if ! [[ "$VIASH_PAR_LOWER_BOUND_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--lower_bound_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_UPPER_BOUND_SIZE" ]]; then - if ! [[ "$VIASH_PAR_UPPER_BOUND_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--upper_bound_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_STEP_SIZE" ]]; then - if ! [[ "$VIASH_PAR_STEP_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--step_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_STATS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_STATS")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_DIST" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DIST")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DIST")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_FREQ" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FREQ")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FREQ")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_PLOT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_PLOT")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_PLOT_R")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_PLOT_R")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_REFGENE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_REFGENE")" ) - VIASH_PAR_REFGENE=$(ViashDockerAutodetectMount "$VIASH_PAR_REFGENE") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_STATS")" ) - VIASH_PAR_OUTPUT_STATS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_STATS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_STATS" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_DIST" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DIST")" ) - VIASH_PAR_OUTPUT_DIST=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DIST") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DIST" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_FREQ" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_FREQ")" ) - VIASH_PAR_OUTPUT_FREQ=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_FREQ") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_FREQ" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_PLOT")" ) - VIASH_PAR_OUTPUT_PLOT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_PLOT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_PLOT" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_PLOT_R")" ) - VIASH_PAR_OUTPUT_PLOT_R=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_PLOT_R") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_PLOT_R" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-rseqc_innerdistance-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_REFGENE+x} ]; then echo "${VIASH_PAR_REFGENE}" | sed "s#'#'\"'\"'#g;s#.*#par_refgene='&'#" ; else echo "# par_refgene="; fi ) -$( if [ ! -z ${VIASH_PAR_SAMPLE_SIZE+x} ]; then echo "${VIASH_PAR_SAMPLE_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_sample_size='&'#" ; else echo "# par_sample_size="; fi ) -$( if [ ! -z ${VIASH_PAR_MAP_QUAL+x} ]; then echo "${VIASH_PAR_MAP_QUAL}" | sed "s#'#'\"'\"'#g;s#.*#par_map_qual='&'#" ; else echo "# par_map_qual="; fi ) -$( if [ ! -z ${VIASH_PAR_LOWER_BOUND_SIZE+x} ]; then echo "${VIASH_PAR_LOWER_BOUND_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_lower_bound_size='&'#" ; else echo "# par_lower_bound_size="; fi ) -$( if [ ! -z ${VIASH_PAR_UPPER_BOUND_SIZE+x} ]; then echo "${VIASH_PAR_UPPER_BOUND_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_upper_bound_size='&'#" ; else echo "# par_upper_bound_size="; fi ) -$( if [ ! -z ${VIASH_PAR_STEP_SIZE+x} ]; then echo "${VIASH_PAR_STEP_SIZE}" | sed "s#'#'\"'\"'#g;s#.*#par_step_size='&'#" ; else echo "# par_step_size="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\"'\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_DIST+x} ]; then echo "${VIASH_PAR_OUTPUT_DIST}" | sed "s#'#'\"'\"'#g;s#.*#par_output_dist='&'#" ; else echo "# par_output_dist="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_FREQ+x} ]; then echo "${VIASH_PAR_OUTPUT_FREQ}" | sed "s#'#'\"'\"'#g;s#.*#par_output_freq='&'#" ; else echo "# par_output_freq="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT}" | sed "s#'#'\"'\"'#g;s#.*#par_output_plot='&'#" ; else echo "# par_output_plot="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_PLOT_R+x} ]; then echo "${VIASH_PAR_OUTPUT_PLOT_R}" | sed "s#'#'\"'\"'#g;s#.*#par_output_plot_r='&'#" ; else echo "# par_output_plot_r="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -exo pipefail - -prefix=\$(openssl rand -hex 8) - -inner_distance.py \\ - -i \$par_input \\ - -r \$par_refgene \\ - -o \$prefix \\ - -k \$par_sample_size \\ - -l \$par_lower_bound_size \\ - -u \$par_upper_bound_size \\ - -s \$par_step_size \\ - -q \$par_map_qual \\ -> stdout.txt - -head -n 2 stdout.txt > \$par_output_stats - -[[ -f "\$prefix.inner_distance.txt" ]] && mv \$prefix.inner_distance.txt \$par_output_dist -[[ -f "\$prefix.inner_distance_plot.pdf" ]] && mv \$prefix.inner_distance_plot.pdf \$par_output_plot -[[ -f "\$prefix.inner_distance_plot.r" ]] && mv \$prefix.inner_distance_plot.r \$par_output_plot_r -[[ -f "\$prefix.inner_distance_freq.txt" ]] && mv \$prefix.inner_distance_freq.txt \$par_output_freq -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") - fi - if [ ! -z "$VIASH_PAR_REFGENE" ]; then - VIASH_PAR_REFGENE=$(ViashDockerStripAutomount "$VIASH_PAR_REFGENE") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ]; then - VIASH_PAR_OUTPUT_STATS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_STATS") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_DIST" ]; then - VIASH_PAR_OUTPUT_DIST=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DIST") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_FREQ" ]; then - VIASH_PAR_OUTPUT_FREQ=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_FREQ") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_PLOT" ]; then - VIASH_PAR_OUTPUT_PLOT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_PLOT") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_PLOT_R" ]; then - VIASH_PAR_OUTPUT_PLOT_R=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_PLOT_R") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -exit 0 diff --git a/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml b/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml index 5f17d79..f6cd9f2 100644 --- a/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_junctionannotation/.config.vsh.yaml @@ -160,7 +160,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -260,8 +260,8 @@ build_info: output: "target/executable/rseqc/rseqc_junctionannotation" executable: "target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -272,7 +272,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation b/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation index c8b3770..db27d98 100755 --- a/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation +++ b/target/executable/rseqc/rseqc_junctionannotation/rseqc_junctionannotation @@ -519,9 +519,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_junctionannotation" -LABEL org.opencontainers.image.created="2024-11-27T08:42:28Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:48Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml b/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml index c064304..ecb6bcc 100644 --- a/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_junctionsaturation/.config.vsh.yaml @@ -149,7 +149,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -249,8 +249,8 @@ build_info: output: "target/executable/rseqc/rseqc_junctionsaturation" executable: "target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -261,7 +261,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation b/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation index 5191548..667c23a 100755 --- a/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation +++ b/target/executable/rseqc/rseqc_junctionsaturation/rseqc_junctionsaturation @@ -522,9 +522,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_junctionsaturation" -LABEL org.opencontainers.image.created="2024-11-27T08:42:29Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:49Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml b/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml index ff46cb1..2ee314e 100644 --- a/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_readdistribution/.config.vsh.yaml @@ -63,7 +63,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -162,8 +162,8 @@ build_info: output: "target/executable/rseqc/rseqc_readdistribution" executable: "target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -174,7 +174,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution b/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution index d09055b..65f1367 100755 --- a/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution +++ b/target/executable/rseqc/rseqc_readdistribution/rseqc_readdistribution @@ -474,9 +474,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_readdistribution" -LABEL org.opencontainers.image.created="2024-11-27T08:42:29Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:49Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml b/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml index efd3d23..fdbd065 100644 --- a/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_readduplication/.config.vsh.yaml @@ -111,7 +111,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -211,8 +211,8 @@ build_info: output: "target/executable/rseqc/rseqc_readduplication" executable: "target/executable/rseqc/rseqc_readduplication/rseqc_readduplication" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -223,7 +223,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication b/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication index bcc3ca6..684c4c5 100755 --- a/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication +++ b/target/executable/rseqc/rseqc_readduplication/rseqc_readduplication @@ -499,9 +499,9 @@ RUN pip install --upgrade pip && \ pip install --upgrade --no-cache-dir "RSeQC" LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_readduplication" -LABEL org.opencontainers.image.created="2024-11-27T08:42:27Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:48Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/rseqc/rseqc_tin/.config.vsh.yaml b/target/executable/rseqc/rseqc_tin/.config.vsh.yaml index b0ada7c..f1b785f 100644 --- a/target/executable/rseqc/rseqc_tin/.config.vsh.yaml +++ b/target/executable/rseqc/rseqc_tin/.config.vsh.yaml @@ -117,7 +117,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -214,8 +214,8 @@ build_info: output: "target/executable/rseqc/rseqc_tin" executable: "target/executable/rseqc/rseqc_tin/rseqc_tin" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -226,7 +226,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/rseqc/rseqc_tin/rseqc_tin b/target/executable/rseqc/rseqc_tin/rseqc_tin index 8df328f..bd7ceea 100755 --- a/target/executable/rseqc/rseqc_tin/rseqc_tin +++ b/target/executable/rseqc/rseqc_tin/rseqc_tin @@ -501,9 +501,9 @@ RUN apt-get update && \ RUN pip3 install RSeQC LABEL org.opencontainers.image.description="Companion container for running component rseqc rseqc_tin" -LABEL org.opencontainers.image.created="2024-11-27T08:42:29Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:48Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/sortmerna/.config.vsh.yaml b/target/executable/sortmerna/.config.vsh.yaml index db3cad2..6b2530c 100644 --- a/target/executable/sortmerna/.config.vsh.yaml +++ b/target/executable/sortmerna/.config.vsh.yaml @@ -103,7 +103,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -192,8 +192,8 @@ build_info: output: "target/executable/sortmerna" executable: "target/executable/sortmerna/sortmerna" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -204,7 +204,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/sortmerna/sortmerna b/target/executable/sortmerna/sortmerna index d37d18e..169a650 100755 --- a/target/executable/sortmerna/sortmerna +++ b/target/executable/sortmerna/sortmerna @@ -486,9 +486,9 @@ function ViashDockerfile { FROM quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0 ENTRYPOINT [] LABEL org.opencontainers.image.description="Companion container for running component sortmerna" -LABEL org.opencontainers.image.created="2024-11-27T08:42:32Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:52Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/stringtie/.config.vsh.yaml b/target/executable/stringtie/.config.vsh.yaml index 99e1579..1813449 100644 --- a/target/executable/stringtie/.config.vsh.yaml +++ b/target/executable/stringtie/.config.vsh.yaml @@ -120,7 +120,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -216,8 +216,8 @@ build_info: output: "target/executable/stringtie" executable: "target/executable/stringtie/stringtie" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -228,7 +228,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/stringtie/stringtie b/target/executable/stringtie/stringtie index 6731f1d..855a0aa 100755 --- a/target/executable/stringtie/stringtie +++ b/target/executable/stringtie/stringtie @@ -496,9 +496,9 @@ tar -xzf stringtie-2.2.1.Linux_x86_64.tar.gz && \ cp stringtie-2.2.1.Linux_x86_64/stringtie /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component stringtie" -LABEL org.opencontainers.image.created="2024-11-27T08:42:33Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:52Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/summarizedexperiment/.config.vsh.yaml b/target/executable/summarizedexperiment/.config.vsh.yaml index 4250ef7..5bd5d6b 100644 --- a/target/executable/summarizedexperiment/.config.vsh.yaml +++ b/target/executable/summarizedexperiment/.config.vsh.yaml @@ -99,7 +99,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -199,8 +199,8 @@ build_info: output: "target/executable/summarizedexperiment" executable: "target/executable/summarizedexperiment/summarizedexperiment" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -211,7 +211,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/summarizedexperiment/summarizedexperiment b/target/executable/summarizedexperiment/summarizedexperiment index ad433b7..af420dd 100755 --- a/target/executable/summarizedexperiment/summarizedexperiment +++ b/target/executable/summarizedexperiment/summarizedexperiment @@ -487,9 +487,9 @@ RUN Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.pa Rscript -e 'if (!requireNamespace("tximeta", quietly = TRUE)) BiocManager::install("tximeta")' LABEL org.opencontainers.image.description="Companion container for running component summarizedexperiment" -LABEL org.opencontainers.image.created="2024-11-27T08:42:31Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:52Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/trimgalore/.config.vsh.yaml b/target/executable/trimgalore/.config.vsh.yaml deleted file mode 100644 index 1dab1c8..0000000 --- a/target/executable/trimgalore/.config.vsh.yaml +++ /dev/null @@ -1,818 +0,0 @@ -name: "trimgalore" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Input files. Note that paired-end files need to be supplied in a\ - \ pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz" - info: null - example: - - "sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: ";" -- name: "Trimming options" - arguments: - - type: "integer" - name: "--quality" - alternatives: - - "-q" - description: "Trim low-quality ends (below the specified Phred score) from reads\ - \ in addition to adapter removal. For RRBS samples, quality trimming will be\ - \ performed first, and adapter trimming is carried in a second round. Other\ - \ files are quality and adapter trimmed in a single pass. The algorithm is the\ - \ same as the one used by BWA (Subtract INT from all qualities; compute partial\ - \ sums from all indices to the end of the sequence; cut sequence at the index\ - \ at which the sum is minimal)." - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--phred33" - description: "Instructs Cutadapt to use ASCII+33 quality scores as Phred scores\ - \ (Sanger/Illumina 1.9+ encoding) for quality trimming." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--phred64" - description: "Instructs Cutadapt to use ASCII+64 quality scores as Phred scores\ - \ (Illumina 1.5 encoding) for quality trimming." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--fastqc" - description: "Run FastQC in the default mode on the FastQ file once trimming is\ - \ complete." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--fastqc_args" - description: "Passes extra arguments to FastQC. If more than one argument is to\ - \ be passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra\ - \ arguments will automatically invoke FastQC, so --fastqc does not have to be\ - \ specified separately." - info: null - example: - - "--nogroup --outdir /home/" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--adapter" - alternatives: - - "-a" - description: "Adapter sequence to be trimmed. If not specified explicitly, Trim\ - \ Galore will try to auto-detect whether the Illumina universal, Nextera transposase\ - \ or Illumina small RNA adapter sequence was used. A single base may also be\ - \ given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. \nAt a special request,\ - \ multiple adapters can also be specified like so: \n -a \" AGCTCCCG -a TTTCATTATAT\ - \ -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT\", \nor\ - \ so:\n -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"\ - \nPotentially in conjucntion with the parameter \"-n 3\" to trim all adapters.\ - \ \n example: 20\n" - info: null - example: - - "AGCTCCCG" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--adapter2" - alternatives: - - "-a2" - description: "Optional adapter sequence to be trimmed off read 2 of paired-end\ - \ files. This option requires '--paired' to be specified as well. If the libraries\ - \ to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5'\ - \ adapter automatically (GATCGTCGGACT). A single base may also be given as e.g.\ - \ -a2 A{10}, to be expanded to -a2 AAAAAAAAAA." - info: null - example: - - "AGCTCCCG" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--illumina" - description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\ - \ universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of\ - \ adapter sequence." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--stranded_illumina" - description: "Adapter sequence to be trimmed is the first 13bp of the Illumina\ - \ stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default\ - \ auto-detection of adapter sequence." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--nextera" - description: "Adapter sequence to be trimmed is the first 12bp of the Nextera\ - \ adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--small_rna" - description: "Adapter sequence to be trimmed is the first 12bp of the Illumina\ - \ Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection\ - \ of adapter sequence. Selecting to trim smallRNA adapters will also lower the\ - \ --length value to 18bp. If the smallRNA libraries are paired-end then a automatically\ - \ (GATCGTCGGACT) unless -a 2 had been defined explicitly." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--consider_already_trimmed" - description: "During adapter auto-detection, the limit set by this argument allows\ - \ the user to set a threshold up to which the file is considered already adapter-trimmed.\ - \ If no adapter sequence exceeds this threshold, no additional adapter trimming\ - \ will be performed (technically, the adapter is set to '-a X'). Quality trimming\ - \ is still performed as usual." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--max_length" - description: "Discard reads that are longer than the specified value after trimming.\ - \ This is only advised for smallRNA sequencing to remove non-small RNA sequences." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--stringency" - description: "Overlap with adapter sequence required to trim a sequence. Defaults\ - \ to a very stringent setting of 1, i.e. even a single bp of overlapping sequence\ - \ will be trimmed off from the 3' end of any read." - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "double" - name: "--error_rate" - alternatives: - - "-e" - description: "Maximum allowed error rate (no. of errors divided by the length\ - \ of the matching region)" - info: null - example: - - 0.1 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--gzip" - description: "Compress the output file with GZIP. If the input files are GZIP-compressed\ - \ the output files will automatically be GZIP compressed as well. As of v0.2.8\ - \ the compression will take place on the fly." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--dont_gzip" - description: "Output files won't be compressed with GZIP. This option overrides\ - \ --gzip." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--length" - description: "Discard reads that became shorter than the specified length because\ - \ of either quality or adapter trimming. A value of '0' effectively disables\ - \ this behaviour. For paired-end files, both reads of a read-pair need to be\ - \ longer than the specified length to be printed out to validated paired-end\ - \ files. If only one read became too short there is the possibility of keeping\ - \ such unpaired single-end reads using the --retain_unpaired option." - info: null - example: - - 20 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--max_n" - description: "The total number of Ns a read may contain before it will be removed\ - \ altogether.In a paired-end setting, either read exceeding this limit will\ - \ result in the entire pair being removed from the trimmed output files. If\ - \ COUNT is a number between 0 and 1, it is interpreted as a fraction of the\ - \ read length." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--trim_n" - description: "Removes Ns from either side of the read. This option does currently\ - \ not work in RRBS mode." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--no_report_file" - description: "If specified no report file will be generated." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--suppress_warn" - description: "If specified any output to STDOUT or STDERR will be suppressed." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--clip_R1" - description: "Instructs TrimGalore to remove given number of bp from the 5' end\ - \ of read 1 (or single-end reads). This may be useful if the qualities were\ - \ very poor, or if there is some sort of unwanted bias at the 5' end." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--clip_R2" - description: "Instructs TrimGalore to remove given number bp from the 5' end of\ - \ read 2 (paired-end reads only). This may be useful if the qualities were very\ - \ poor, or if there is some sort of unwanted bias at the 5' end. For paired-end\ - \ BS-Seq, it is recommended to remove the first few bp because the end-repair\ - \ reaction may introduce a bias towards low methylation." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--three_prime_clip_R1" - description: "Instructs Trim Galore to remove spacified number of bp from the\ - \ 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has\ - \ been performed. This may remove some bias from the 3' end that is not directly\ - \ related to adapter sequence or basecall quality." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--three_prime_clip_R2" - description: "Instructs Trim Galore to remove bp from the 3' end of read\ - \ 2 AFTER adapter/quality trimming has been performed. This may remove some\ - \ unwanted bias from the 3' end that is not directly related to adapter sequence\ - \ or basecall quality." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--nextseq" - description: "This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt,\ - \ which will set a quality cutoff (that is normally given with -q instead),\ - \ but qualities of G bases are ignored. This trimming is in common for the NextSeq-\ - \ and NovaSeq-platforms, where basecalls without any signal are called as high-quality\ - \ G bases. This is mutually exlusive with '-q INT'." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--basename" - description: "Use specified name (PREFERRED_NAME) as the basename for output files,\ - \ instead of deriving the filenames from the input files. Single-end data would\ - \ be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz)\ - \ and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works\ - \ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\ - \ longer lists." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--cores" - alternatives: - - "-j" - description: "Number of cores to be used for trimming" - info: null - example: - - 1 - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Specific trimming options without adapter/quality trimming" - arguments: - - type: "integer" - name: "--hardtrim5" - description: "Instead of performing adapter-/quality trimming, this option will\ - \ simply hard-trim sequences to bp at the 5'-end. Once hard-trimming of\ - \ files is complete, Trim Galore will exit. Hard-trimmed output files will end\ - \ in ._5prime.fq(.gz)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--hardtrim3" - description: "Instead of performing adapter-/quality trimming, this option will\ - \ simply hard-trim sequences to bp at the 3'-end. Once hard-trimming of\ - \ files is complete, Trim Galore will exit. Hard-trimmed output files will end\ - \ in ._3prime.fq(.gz)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--clock" - description: "In this mode, reads are trimmed in a specific way that is currently\ - \ used for the Mouse Epigenetic Clock." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--polyA" - description: "This is a new, still experimental, trimming mode to identify and\ - \ remove poly-A tails from sequences. When --polyA is selected, Trim Galore\ - \ attempts to identify from the first supplied sample whether sequences contain\ - \ more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines\ - \ if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA\ - \ or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary\ - \ base from the start of the reads. The auto-detection uses a default of A{20}\ - \ for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These\ - \ values may be changed manually using the options -a and -a2. In addition to\ - \ trimming the sequences, white spaces are replaced with _ and it records in\ - \ the read ID how many bases were trimmed so it can later be used to identify\ - \ PolyA trimmed sequences. This is currently done by writing tags to both the\ - \ start (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming\ - \ mode expects that sequences were both adapter and quality before looking\ - \ for Poly-A tails, and it is the user's responsibility to carry out an initial\ - \ round of trimming." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--implicon" - description: "This is a special mode of operation for paired-end data, such as\ - \ required for the IMPLICON method, where a UMI sequence is getting transferred\ - \ from the start of Read 2 to the readID of both reads. Following this, Trim\ - \ Galore will exit. In it's current implementation, the UMI carrying reads come\ - \ in the following format\n Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\ - \ 3'\n Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'\nWhere UUUUUUUU is\ - \ a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual\ - \ fragment to be sequenced. The UMI of Read 2 (R2) is written into the read\ - \ ID of both reads and removed from the actual sequence.\n" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "RRBS-specific options" - arguments: - - type: "boolean" - name: "--rrbs" - description: "Specifies that the input file was an MspI digested RRBS sample (recognition\ - \ site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed\ - \ will have a further 2 bp removed from their 3' end. Sequences which were merely\ - \ trimmed because of poor quality will not be shortened further. Read 2 of paired-end\ - \ libraries will in addition have the first 2 bp removed from the 5' end (by\ - \ setting '--clip_r2 2'). This is to avoid using artificial methylation calls\ - \ from the filled-in cytosine positions close to the 3' MspI site in sequenced\ - \ fragments. This option is not recommended for users of the Tecan Ovation RRBS\ - \ Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--non_directional" - description: "Selecting this option for non-directional RRBS libraries will screen\ - \ quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and,\ - \ if found, removes the first two basepairs. Like with the option '--rrbs' this\ - \ avoids using cytosine positions that were filled-in during the end-repair\ - \ step. '--non_directional' requires '--rrbs' to be specified as well. Note\ - \ that this option does not set '--clip_r2 2' in paired-end mode." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--keep" - description: "Keep the quality trimmed intermediate file." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Paired-end specific options" - arguments: - - type: "boolean" - name: "--paired" - description: "This option performs length trimming of quality/adapter/RRBS trimmed\ - \ reads for paired-end files. To pass the validation test, both sequences of\ - \ a sequence pair are required to have a certain minimum length which is governed\ - \ by the option --length (see above). If only one read passes this length threshold\ - \ the other read can be rescued (see option --retain_unpaired). Using this option\ - \ lets you discard too short read pairs without disturbing the sequence-by-sequence\ - \ order of FastQ files which is required by many aligners. Trim Galore expects\ - \ paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq\ - \ SRR2_1.fq.gz SRR2_2.fq.gz ... ." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--retain_unpaired" - description: "If only one of the two paired-end reads became too short, the longer\ - \ read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output\ - \ files. The length cutoff for unpaired single-end reads is governed by the\ - \ parameters -r1/--length_1 and -r2/--length_2." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--length_1" - alternatives: - - "-r1" - description: "Unpaired single-end read length cutoff needed for read 1 to be written\ - \ to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode." - info: null - example: - - 35 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--length_2" - alternatives: - - "-r2" - description: "Unpaired single-end read length cutoff needed for read 2 to be written\ - \ to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode." - info: null - example: - - 35 - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output_dir" - alternatives: - - "-o" - description: "If specified all output will be written to this directory instead\ - \ of the current directory." - info: null - default: - - "trimmed_output" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimmed_r1" - description: "Output file for read 1. Only works when 1 file (single-end) or 2\ - \ files (paired-end) are specified, but not for longer lists." - info: null - example: - - "read_1.fastq.gz" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimmed_r2" - description: "Output file for read 2. Only works when 1 file (single-end) or 2\ - \ files (paired-end) are specified, but not for longer lists." - info: null - example: - - "read_2.fastq.gz" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimming_report_r1" - description: "Trimming report for read 1. Only works when 1 file (single-end)\ - \ or 2 files (paired-end) are specified, but not for longer lists." - info: null - example: - - "read_1.trimming_report.txt" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimming_report_r2" - description: "Trimming report for read 1. Only works when 1 file (single-end)\ - \ or 2 files (paired-end) are specified, but not for longer lists." - info: null - example: - - "read_2.trimming_report.txt" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimmed_fastqc_html_1" - description: "FastQC report for trimmed (single-end) reads (or read 1 for paired-end).\ - \ Only works when 1 file (single-end) or 2 files (paired-end) are specified,\ - \ but not for longer lists." - info: null - example: - - "read_1.fastqc.html" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimmed_fastqc_html_2" - description: "FastQC report for trimmed reads (read2 for paired-end). Only works\ - \ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\ - \ longer lists." - info: null - example: - - "read_2.fastqc.html" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimmed_fastqc_zip_1" - description: "FastQC results for trimmed (single-end) reads (or read 1 for paired-end).\ - \ Only works when 1 file (single-end) or 2 files (paired-end) are specified,\ - \ but not for longer lists." - info: null - example: - - "read_1.fastqc.zip" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--trimmed_fastqc_zip_2" - description: "FastQC results for trimmed reads (read2 for paired-end). Only works\ - \ when 1 file (single-end) or 2 files (paired-end) are specified, but not for\ - \ longer lists." - info: null - example: - - "read_2.fastqc.zip" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--unpaired_r1" - description: "Output file for unpired read 1. Only works when 1 file (single-end)\ - \ or 2 files (paired-end) are specified, but not for longer lists." - info: null - example: - - "unpaired_read_1.fastq" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--unpaired_r2" - description: "Output file for unpaired read 2. Only works when 1 file (single-end)\ - \ or 2 files (paired-end) are specified, but not for longer lists." - info: null - example: - - "unpaired_read_2.fastq" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "A wrapper tool around Cutadapt and FastQC to consistently apply quality\ - \ and adapter trimming to FastQ files. \n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -info: null -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -keywords: -- "trimming" -- "adapters" -license: "GPL-3.0" -links: - repository: "https://github.com/FelixKrueger/TrimGalore" - homepage: "https://github.com/FelixKrueger/TrimGalore" - documentation: "https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "docker" - run: - - "echo \"TrimGalore: `trim_galore --version | sed -n 's/.*version\\s\\+\\([0-9]\\\ - +\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p'`\" > /var/software_versions.txt\n" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/trimgalore/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/trimgalore" - executable: "target/executable/trimgalore/trimgalore" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/trimgalore/trimgalore b/target/executable/trimgalore/trimgalore deleted file mode 100755 index 971832e..0000000 --- a/target/executable/trimgalore/trimgalore +++ /dev/null @@ -1,2640 +0,0 @@ -#!/usr/bin/env bash - -# trimgalore main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="trimgalore" -VIASH_META_FUNCTIONALITY_NAME="trimgalore" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "trimgalore main" - echo "" - echo "A wrapper tool around Cutadapt and FastQC to consistently apply quality and" - echo "adapter trimming to FastQ files." - echo "" - echo "Input:" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq" - echo " Input files. Note that paired-end files need to be supplied in a" - echo " pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz" - echo "" - echo "Trimming options:" - echo " -q, --quality" - echo " type: integer" - echo " example: 20" - echo " Trim low-quality ends (below the specified Phred score) from reads in" - echo " addition to adapter removal. For RRBS samples, quality trimming will be" - echo " performed first, and adapter trimming is carried in a second round." - echo " Other files are quality and adapter trimmed in a single pass. The" - echo " algorithm is the same as the one used by BWA (Subtract INT from all" - echo " qualities; compute partial sums from all indices to the end of the" - echo " sequence; cut sequence at the index at which the sum is minimal)." - echo "" - echo " --phred33" - echo " type: boolean" - echo " Instructs Cutadapt to use ASCII+33 quality scores as Phred scores" - echo " (Sanger/Illumina 1.9+ encoding) for quality trimming." - echo "" - echo " --phred64" - echo " type: boolean" - echo " Instructs Cutadapt to use ASCII+64 quality scores as Phred scores" - echo " (Illumina 1.5 encoding) for quality trimming." - echo "" - echo " --fastqc" - echo " type: boolean" - echo " Run FastQC in the default mode on the FastQ file once trimming is" - echo " complete." - echo "" - echo " --fastqc_args" - echo " type: string" - echo " example: --nogroup --outdir /home/" - echo " Passes extra arguments to FastQC. If more than one argument is to be" - echo " passed to FastQC they must be in the form \"arg1 arg2 ...\". Passing extra" - echo " arguments will automatically invoke FastQC, so --fastqc does not have to" - echo " be specified separately." - echo "" - echo " -a, --adapter" - echo " type: string" - echo " example: AGCTCCCG" - echo " Adapter sequence to be trimmed. If not specified explicitly, Trim Galore" - echo " will try to auto-detect whether the Illumina universal, Nextera" - echo " transposase or Illumina small RNA adapter sequence was used. A single" - echo " base may also be given as e.g. -a A{10}, to be expanded to -a" - echo " AAAAAAAAAA." - echo " At a special request, multiple adapters can also be specified like so:" - echo " -a \" AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT\" -a2 \" AGCTAGCG -a" - echo " TCTCTTATAT -a TTTCGGATTTAT\"," - echo " or so:" - echo " -a \"file:../multiple_adapters.fa\" -a2 \"file:../different_adapters.fa\"" - echo " Potentially in conjucntion with the parameter \"-n 3\" to trim all" - echo " adapters." - echo " example: 20" - echo "" - echo " -a2, --adapter2" - echo " type: string" - echo " example: AGCTCCCG" - echo " Optional adapter sequence to be trimmed off read 2 of paired-end files." - echo " This option requires '--paired' to be specified as well. If the" - echo " libraries to be trimmed are smallRNA then a2 will be set to the Illumina" - echo " small RNA 5' adapter automatically (GATCGTCGGACT). A single base may" - echo " also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA." - echo "" - echo " --illumina" - echo " type: boolean" - echo " Adapter sequence to be trimmed is the first 13bp of the Illumina" - echo " universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection" - echo " of adapter sequence." - echo "" - echo " --stranded_illumina" - echo " type: boolean" - echo " Adapter sequence to be trimmed is the first 13bp of the Illumina" - echo " stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the" - echo " default auto-detection of adapter sequence." - echo "" - echo " --nextera" - echo " type: boolean" - echo " Adapter sequence to be trimmed is the first 12bp of the Nextera adapter" - echo " 'CTGTCTCTTATA' instead of the default auto-detection of adapter" - echo " sequence." - echo "" - echo " --small_rna" - echo " type: boolean" - echo " Adapter sequence to be trimmed is the first 12bp of the Illumina Small" - echo " RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of" - echo " adapter sequence. Selecting to trim smallRNA adapters will also lower" - echo " the --length value to 18bp. If the smallRNA libraries are paired-end" - echo " then a automatically (GATCGTCGGACT) unless -a 2 had been defined" - echo " explicitly." - echo "" - echo " --consider_already_trimmed" - echo " type: integer" - echo " During adapter auto-detection, the limit set by this argument allows the" - echo " user to set a threshold up to which the file is considered already" - echo " adapter-trimmed. If no adapter sequence exceeds this threshold, no" - echo " additional adapter trimming will be performed (technically, the adapter" - echo " is set to '-a X'). Quality trimming is still performed as usual." - echo "" - echo " --max_length" - echo " type: integer" - echo " Discard reads that are longer than the specified value after trimming." - echo " This is only advised for smallRNA sequencing to remove non-small RNA" - echo " sequences." - echo "" - echo " --stringency" - echo " type: integer" - echo " example: 1" - echo " Overlap with adapter sequence required to trim a sequence. Defaults to a" - echo " very stringent setting of 1, i.e. even a single bp of overlapping" - echo " sequence will be trimmed off from the 3' end of any read." - echo "" - echo " -e, --error_rate" - echo " type: double" - echo " example: 0.1" - echo " Maximum allowed error rate (no. of errors divided by the length of the" - echo " matching region)" - echo "" - echo " --gzip" - echo " type: boolean" - echo " Compress the output file with GZIP. If the input files are" - echo " GZIP-compressed the output files will automatically be GZIP compressed" - echo " as well. As of v0.2.8 the compression will take place on the fly." - echo "" - echo " --dont_gzip" - echo " type: boolean" - echo " Output files won't be compressed with GZIP. This option overrides" - echo " --gzip." - echo "" - echo " --length" - echo " type: integer" - echo " example: 20" - echo " Discard reads that became shorter than the specified length because of" - echo " either quality or adapter trimming. A value of '0' effectively disables" - echo " this behaviour. For paired-end files, both reads of a read-pair need to" - echo " be longer than the specified length to be printed out to validated" - echo " paired-end files. If only one read became too short there is the" - echo " possibility of keeping such unpaired single-end reads using the" - echo " --retain_unpaired option." - echo "" - echo " --max_n" - echo " type: integer" - echo " The total number of Ns a read may contain before it will be removed" - echo " altogether.In a paired-end setting, either read exceeding this limit" - echo " will result in the entire pair being removed from the trimmed output" - echo " files. If COUNT is a number between 0 and 1, it is interpreted as a" - echo " fraction of the read length." - echo "" - echo " --trim_n" - echo " type: boolean" - echo " Removes Ns from either side of the read. This option does currently not" - echo " work in RRBS mode." - echo "" - echo " --no_report_file" - echo " type: boolean" - echo " If specified no report file will be generated." - echo "" - echo " --suppress_warn" - echo " type: boolean" - echo " If specified any output to STDOUT or STDERR will be suppressed." - echo "" - echo " --clip_R1" - echo " type: integer" - echo " Instructs TrimGalore to remove given number of bp from the 5' end of" - echo " read 1 (or single-end reads). This may be useful if the qualities were" - echo " very poor, or if there is some sort of unwanted bias at the 5' end." - echo "" - echo " --clip_R2" - echo " type: integer" - echo " Instructs TrimGalore to remove given number bp from the 5' end of read 2" - echo " (paired-end reads only). This may be useful if the qualities were very" - echo " poor, or if there is some sort of unwanted bias at the 5' end. For" - echo " paired-end BS-Seq, it is recommended to remove the first few bp because" - echo " the end-repair reaction may introduce a bias towards low methylation." - echo "" - echo " --three_prime_clip_R1" - echo " type: integer" - echo " Instructs Trim Galore to remove spacified number of bp from the 3' end" - echo " of read 1 (or single-end reads) AFTER adapter/quality trimming has been" - echo " performed. This may remove some bias from the 3' end that is not" - echo " directly related to adapter sequence or basecall quality." - echo "" - echo " --three_prime_clip_R2" - echo " type: integer" - echo " Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER" - echo " adapter/quality trimming has been performed. This may remove some" - echo " unwanted bias from the 3' end that is not directly related to adapter" - echo " sequence or basecall quality." - echo "" - echo " --nextseq" - echo " type: integer" - echo " This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which" - echo " will set a quality cutoff (that is normally given with -q instead), but" - echo " qualities of G bases are ignored. This trimming is in common for the" - echo " NextSeq- and NovaSeq-platforms, where basecalls without any signal are" - echo " called as high-quality G bases. This is mutually exlusive with '-q INT'." - echo "" - echo " --basename" - echo " type: string" - echo " Use specified name (PREFERRED_NAME) as the basename for output files," - echo " instead of deriving the filenames from the input files. Single-end data" - echo " would be called PREFERRED_NAME_trimmed.fq(.gz), or" - echo " PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for" - echo " paired-end data. --basename only works when 1 file (single-end) or 2" - echo " files (paired-end) are specified, but not for longer lists." - echo "" - echo " -j, --cores" - echo " type: integer" - echo " example: 1" - echo " Number of cores to be used for trimming" - echo "" - echo "Specific trimming options without adapter/quality trimming:" - echo " --hardtrim5" - echo " type: integer" - echo " Instead of performing adapter-/quality trimming, this option will simply" - echo " hard-trim sequences to bp at the 5'-end. Once hard-trimming of" - echo " files is complete, Trim Galore will exit. Hard-trimmed output files will" - echo " end in ._5prime.fq(.gz)." - echo "" - echo " --hardtrim3" - echo " type: integer" - echo " Instead of performing adapter-/quality trimming, this option will simply" - echo " hard-trim sequences to bp at the 3'-end. Once hard-trimming of" - echo " files is complete, Trim Galore will exit. Hard-trimmed output files will" - echo " end in ._3prime.fq(.gz)." - echo "" - echo " --clock" - echo " type: boolean" - echo " In this mode, reads are trimmed in a specific way that is currently used" - echo " for the Mouse Epigenetic Clock." - echo "" - echo " --polyA" - echo " type: boolean" - echo " This is a new, still experimental, trimming mode to identify and remove" - echo " poly-A tails from sequences. When --polyA is selected, Trim Galore" - echo " attempts to identify from the first supplied sample whether sequences" - echo " contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'." - echo " This determines if Read 1 of a paired-end end file, or single-end files," - echo " are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2" - echo " is trimmed for the complementary base from the start of the reads. The" - echo " auto-detection uses a default of A{20} for Read1 (3'-end trimming) and" - echo " T{150} for Read2 (5'-end trimming). These values may be changed manually" - echo " using the options -a and -a2. In addition to trimming the sequences," - echo " white spaces are replaced with _ and it records in the read ID how many" - echo " bases were trimmed so it can later be used to identify PolyA trimmed" - echo " sequences. This is currently done by writing tags to both the start" - echo " (\"32:A:\") and end (\"_PolyA:32\") of the reads. The poly-A trimming mode" - echo " expects that sequences were both adapter and quality before looking for" - echo " Poly-A tails, and it is the user's responsibility to carry out an" - echo " initial round of trimming." - echo "" - echo " --implicon" - echo " type: boolean" - echo " This is a special mode of operation for paired-end data, such as" - echo " required for the IMPLICON method, where a UMI sequence is getting" - echo " transferred from the start of Read 2 to the readID of both reads." - echo " Following this, Trim Galore will exit. In it's current implementation," - echo " the UMI carrying reads come in the following format" - echo " Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3'" - echo " Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'" - echo " Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and" - echo " FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2" - echo " (R2) is written into the read ID of both reads and removed from the" - echo " actual sequence." - echo "" - echo "RRBS-specific options:" - echo " --rrbs" - echo " type: boolean" - echo " Specifies that the input file was an MspI digested RRBS sample" - echo " (recognition site is CCGG). Single-end or Read 1 sequences (paired-end)" - echo " which were adapter-trimmed will have a further 2 bp removed from their" - echo " 3' end. Sequences which were merely trimmed because of poor quality will" - echo " not be shortened further. Read 2 of paired-end libraries will in" - echo " addition have the first 2 bp removed from the 5' end (by setting" - echo " '--clip_r2 2'). This is to avoid using artificial methylation calls from" - echo " the filled-in cytosine positions close to the 3' MspI site in sequenced" - echo " fragments. This option is not recommended for users of the Tecan Ovation" - echo " RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below)." - echo "" - echo " --non_directional" - echo " type: boolean" - echo " Selecting this option for non-directional RRBS libraries will screen" - echo " quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read" - echo " and, if found, removes the first two basepairs. Like with the option" - echo " '--rrbs' this avoids using cytosine positions that were filled-in during" - echo " the end-repair step. '--non_directional' requires '--rrbs' to be" - echo " specified as well. Note that this option does not set '--clip_r2 2' in" - echo " paired-end mode." - echo "" - echo " --keep" - echo " type: boolean" - echo " Keep the quality trimmed intermediate file." - echo "" - echo "Paired-end specific options:" - echo " --paired" - echo " type: boolean" - echo " This option performs length trimming of quality/adapter/RRBS trimmed" - echo " reads for paired-end files. To pass the validation test, both sequences" - echo " of a sequence pair are required to have a certain minimum length which" - echo " is governed by the option --length (see above). If only one read passes" - echo " this length threshold the other read can be rescued (see option" - echo " --retain_unpaired). Using this option lets you discard too short read" - echo " pairs without disturbing the sequence-by-sequence order of FastQ files" - echo " which is required by many aligners. Trim Galore expects paired-end files" - echo " to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq" - echo " SRR2_1.fq.gz SRR2_2.fq.gz ... ." - echo "" - echo " --retain_unpaired" - echo " type: boolean" - echo " If only one of the two paired-end reads became too short, the longer" - echo " read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq'" - echo " output files. The length cutoff for unpaired single-end reads is" - echo " governed by the parameters -r1/--length_1 and -r2/--length_2." - echo "" - echo " -r1, --length_1" - echo " type: integer" - echo " example: 35" - echo " Unpaired single-end read length cutoff needed for read 1 to be written" - echo " to '.unpaired_1.fq' output file. These reads may be mapped in single-end" - echo " mode." - echo "" - echo " -r2, --length_2" - echo " type: integer" - echo " example: 35" - echo " Unpaired single-end read length cutoff needed for read 2 to be written" - echo " to '.unpaired_2.fq' output file. These reads may be mapped in single-end" - echo " mode." - echo "" - echo "Output:" - echo " -o, --output_dir" - echo " type: file, output, file must exist" - echo " default: trimmed_output" - echo " If specified all output will be written to this directory instead of the" - echo " current directory." - echo "" - echo " --trimmed_r1" - echo " type: file, output, file must exist" - echo " example: read_1.fastq.gz" - echo " Output file for read 1. Only works when 1 file (single-end) or 2 files" - echo " (paired-end) are specified, but not for longer lists." - echo "" - echo " --trimmed_r2" - echo " type: file, output, file must exist" - echo " example: read_2.fastq.gz" - echo " Output file for read 2. Only works when 1 file (single-end) or 2 files" - echo " (paired-end) are specified, but not for longer lists." - echo "" - echo " --trimming_report_r1" - echo " type: file, output, file must exist" - echo " example: read_1.trimming_report.txt" - echo " Trimming report for read 1. Only works when 1 file (single-end) or 2" - echo " files (paired-end) are specified, but not for longer lists." - echo "" - echo " --trimming_report_r2" - echo " type: file, output, file must exist" - echo " example: read_2.trimming_report.txt" - echo " Trimming report for read 1. Only works when 1 file (single-end) or 2" - echo " files (paired-end) are specified, but not for longer lists." - echo "" - echo " --trimmed_fastqc_html_1" - echo " type: file, output, file must exist" - echo " example: read_1.fastqc.html" - echo " FastQC report for trimmed (single-end) reads (or read 1 for paired-end)." - echo " Only works when 1 file (single-end) or 2 files (paired-end) are" - echo " specified, but not for longer lists." - echo "" - echo " --trimmed_fastqc_html_2" - echo " type: file, output, file must exist" - echo " example: read_2.fastqc.html" - echo " FastQC report for trimmed reads (read2 for paired-end). Only works when" - echo " 1 file (single-end) or 2 files (paired-end) are specified, but not for" - echo " longer lists." - echo "" - echo " --trimmed_fastqc_zip_1" - echo " type: file, output, file must exist" - echo " example: read_1.fastqc.zip" - echo " FastQC results for trimmed (single-end) reads (or read 1 for" - echo " paired-end). Only works when 1 file (single-end) or 2 files (paired-end)" - echo " are specified, but not for longer lists." - echo "" - echo " --trimmed_fastqc_zip_2" - echo " type: file, output, file must exist" - echo " example: read_2.fastqc.zip" - echo " FastQC results for trimmed reads (read2 for paired-end). Only works when" - echo " 1 file (single-end) or 2 files (paired-end) are specified, but not for" - echo " longer lists." - echo "" - echo " --unpaired_r1" - echo " type: file, output, file must exist" - echo " example: unpaired_read_1.fastq" - echo " Output file for unpired read 1. Only works when 1 file (single-end) or 2" - echo " files (paired-end) are specified, but not for longer lists." - echo "" - echo " --unpaired_r2" - echo " type: file, output, file must exist" - echo " example: unpaired_read_2.fastq" - echo " Output file for unpaired read 2. Only works when 1 file (single-end) or" - echo " 2 files (paired-end) are specified, but not for longer lists." -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0 -ENTRYPOINT [] -RUN echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt - -LABEL org.opencontainers.image.description="Companion container for running component trimgalore" -LABEL org.opencontainers.image.created="2024-11-27T08:42:32Z" -LABEL org.opencontainers.image.source="https://github.com/FelixKrueger/TrimGalore" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "trimgalore main" - exit - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --quality) - [ -n "$VIASH_PAR_QUALITY" ] && ViashError Bad arguments for option \'--quality\': \'$VIASH_PAR_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --quality. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --quality=*) - [ -n "$VIASH_PAR_QUALITY" ] && ViashError Bad arguments for option \'--quality=*\': \'$VIASH_PAR_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALITY=$(ViashRemoveFlags "$1") - shift 1 - ;; - -q) - [ -n "$VIASH_PAR_QUALITY" ] && ViashError Bad arguments for option \'-q\': \'$VIASH_PAR_QUALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALITY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -q. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --phred33) - [ -n "$VIASH_PAR_PHRED33" ] && ViashError Bad arguments for option \'--phred33\': \'$VIASH_PAR_PHRED33\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PHRED33="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --phred33. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --phred33=*) - [ -n "$VIASH_PAR_PHRED33" ] && ViashError Bad arguments for option \'--phred33=*\': \'$VIASH_PAR_PHRED33\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PHRED33=$(ViashRemoveFlags "$1") - shift 1 - ;; - --phred64) - [ -n "$VIASH_PAR_PHRED64" ] && ViashError Bad arguments for option \'--phred64\': \'$VIASH_PAR_PHRED64\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PHRED64="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --phred64. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --phred64=*) - [ -n "$VIASH_PAR_PHRED64" ] && ViashError Bad arguments for option \'--phred64=*\': \'$VIASH_PAR_PHRED64\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PHRED64=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastqc) - [ -n "$VIASH_PAR_FASTQC" ] && ViashError Bad arguments for option \'--fastqc\': \'$VIASH_PAR_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastqc=*) - [ -n "$VIASH_PAR_FASTQC" ] && ViashError Bad arguments for option \'--fastqc=*\': \'$VIASH_PAR_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastqc_args) - [ -n "$VIASH_PAR_FASTQC_ARGS" ] && ViashError Bad arguments for option \'--fastqc_args\': \'$VIASH_PAR_FASTQC_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastqc_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastqc_args=*) - [ -n "$VIASH_PAR_FASTQC_ARGS" ] && ViashError Bad arguments for option \'--fastqc_args=*\': \'$VIASH_PAR_FASTQC_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQC_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --adapter) - [ -n "$VIASH_PAR_ADAPTER" ] && ViashError Bad arguments for option \'--adapter\': \'$VIASH_PAR_ADAPTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADAPTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --adapter. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --adapter=*) - [ -n "$VIASH_PAR_ADAPTER" ] && ViashError Bad arguments for option \'--adapter=*\': \'$VIASH_PAR_ADAPTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADAPTER=$(ViashRemoveFlags "$1") - shift 1 - ;; - -a) - [ -n "$VIASH_PAR_ADAPTER" ] && ViashError Bad arguments for option \'-a\': \'$VIASH_PAR_ADAPTER\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADAPTER="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -a. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --adapter2) - [ -n "$VIASH_PAR_ADAPTER2" ] && ViashError Bad arguments for option \'--adapter2\': \'$VIASH_PAR_ADAPTER2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADAPTER2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --adapter2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --adapter2=*) - [ -n "$VIASH_PAR_ADAPTER2" ] && ViashError Bad arguments for option \'--adapter2=*\': \'$VIASH_PAR_ADAPTER2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADAPTER2=$(ViashRemoveFlags "$1") - shift 1 - ;; - -a2) - [ -n "$VIASH_PAR_ADAPTER2" ] && ViashError Bad arguments for option \'-a2\': \'$VIASH_PAR_ADAPTER2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ADAPTER2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -a2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --illumina) - [ -n "$VIASH_PAR_ILLUMINA" ] && ViashError Bad arguments for option \'--illumina\': \'$VIASH_PAR_ILLUMINA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ILLUMINA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --illumina. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --illumina=*) - [ -n "$VIASH_PAR_ILLUMINA" ] && ViashError Bad arguments for option \'--illumina=*\': \'$VIASH_PAR_ILLUMINA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ILLUMINA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --stranded_illumina) - [ -n "$VIASH_PAR_STRANDED_ILLUMINA" ] && ViashError Bad arguments for option \'--stranded_illumina\': \'$VIASH_PAR_STRANDED_ILLUMINA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED_ILLUMINA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --stranded_illumina. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --stranded_illumina=*) - [ -n "$VIASH_PAR_STRANDED_ILLUMINA" ] && ViashError Bad arguments for option \'--stranded_illumina=*\': \'$VIASH_PAR_STRANDED_ILLUMINA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRANDED_ILLUMINA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --nextera) - [ -n "$VIASH_PAR_NEXTERA" ] && ViashError Bad arguments for option \'--nextera\': \'$VIASH_PAR_NEXTERA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NEXTERA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --nextera. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --nextera=*) - [ -n "$VIASH_PAR_NEXTERA" ] && ViashError Bad arguments for option \'--nextera=*\': \'$VIASH_PAR_NEXTERA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NEXTERA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --small_rna) - [ -n "$VIASH_PAR_SMALL_RNA" ] && ViashError Bad arguments for option \'--small_rna\': \'$VIASH_PAR_SMALL_RNA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SMALL_RNA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --small_rna. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --small_rna=*) - [ -n "$VIASH_PAR_SMALL_RNA" ] && ViashError Bad arguments for option \'--small_rna=*\': \'$VIASH_PAR_SMALL_RNA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SMALL_RNA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --consider_already_trimmed) - [ -n "$VIASH_PAR_CONSIDER_ALREADY_TRIMMED" ] && ViashError Bad arguments for option \'--consider_already_trimmed\': \'$VIASH_PAR_CONSIDER_ALREADY_TRIMMED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CONSIDER_ALREADY_TRIMMED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --consider_already_trimmed. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --consider_already_trimmed=*) - [ -n "$VIASH_PAR_CONSIDER_ALREADY_TRIMMED" ] && ViashError Bad arguments for option \'--consider_already_trimmed=*\': \'$VIASH_PAR_CONSIDER_ALREADY_TRIMMED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CONSIDER_ALREADY_TRIMMED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_length) - [ -n "$VIASH_PAR_MAX_LENGTH" ] && ViashError Bad arguments for option \'--max_length\': \'$VIASH_PAR_MAX_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_LENGTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_length. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_length=*) - [ -n "$VIASH_PAR_MAX_LENGTH" ] && ViashError Bad arguments for option \'--max_length=*\': \'$VIASH_PAR_MAX_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_LENGTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --stringency) - [ -n "$VIASH_PAR_STRINGENCY" ] && ViashError Bad arguments for option \'--stringency\': \'$VIASH_PAR_STRINGENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRINGENCY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --stringency. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --stringency=*) - [ -n "$VIASH_PAR_STRINGENCY" ] && ViashError Bad arguments for option \'--stringency=*\': \'$VIASH_PAR_STRINGENCY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_STRINGENCY=$(ViashRemoveFlags "$1") - shift 1 - ;; - --error_rate) - [ -n "$VIASH_PAR_ERROR_RATE" ] && ViashError Bad arguments for option \'--error_rate\': \'$VIASH_PAR_ERROR_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ERROR_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --error_rate. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --error_rate=*) - [ -n "$VIASH_PAR_ERROR_RATE" ] && ViashError Bad arguments for option \'--error_rate=*\': \'$VIASH_PAR_ERROR_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ERROR_RATE=$(ViashRemoveFlags "$1") - shift 1 - ;; - -e) - [ -n "$VIASH_PAR_ERROR_RATE" ] && ViashError Bad arguments for option \'-e\': \'$VIASH_PAR_ERROR_RATE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_ERROR_RATE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -e. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gzip) - [ -n "$VIASH_PAR_GZIP" ] && ViashError Bad arguments for option \'--gzip\': \'$VIASH_PAR_GZIP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GZIP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --gzip. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --gzip=*) - [ -n "$VIASH_PAR_GZIP" ] && ViashError Bad arguments for option \'--gzip=*\': \'$VIASH_PAR_GZIP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GZIP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --dont_gzip) - [ -n "$VIASH_PAR_DONT_GZIP" ] && ViashError Bad arguments for option \'--dont_gzip\': \'$VIASH_PAR_DONT_GZIP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DONT_GZIP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --dont_gzip. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --dont_gzip=*) - [ -n "$VIASH_PAR_DONT_GZIP" ] && ViashError Bad arguments for option \'--dont_gzip=*\': \'$VIASH_PAR_DONT_GZIP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DONT_GZIP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --length) - [ -n "$VIASH_PAR_LENGTH" ] && ViashError Bad arguments for option \'--length\': \'$VIASH_PAR_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --length. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --length=*) - [ -n "$VIASH_PAR_LENGTH" ] && ViashError Bad arguments for option \'--length=*\': \'$VIASH_PAR_LENGTH\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_n) - [ -n "$VIASH_PAR_MAX_N" ] && ViashError Bad arguments for option \'--max_n\': \'$VIASH_PAR_MAX_N\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_N="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_n. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_n=*) - [ -n "$VIASH_PAR_MAX_N" ] && ViashError Bad arguments for option \'--max_n=*\': \'$VIASH_PAR_MAX_N\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_N=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trim_n) - [ -n "$VIASH_PAR_TRIM_N" ] && ViashError Bad arguments for option \'--trim_n\': \'$VIASH_PAR_TRIM_N\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIM_N="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trim_n. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trim_n=*) - [ -n "$VIASH_PAR_TRIM_N" ] && ViashError Bad arguments for option \'--trim_n=*\': \'$VIASH_PAR_TRIM_N\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIM_N=$(ViashRemoveFlags "$1") - shift 1 - ;; - --no_report_file) - [ -n "$VIASH_PAR_NO_REPORT_FILE" ] && ViashError Bad arguments for option \'--no_report_file\': \'$VIASH_PAR_NO_REPORT_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NO_REPORT_FILE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --no_report_file. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --no_report_file=*) - [ -n "$VIASH_PAR_NO_REPORT_FILE" ] && ViashError Bad arguments for option \'--no_report_file=*\': \'$VIASH_PAR_NO_REPORT_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NO_REPORT_FILE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --suppress_warn) - [ -n "$VIASH_PAR_SUPPRESS_WARN" ] && ViashError Bad arguments for option \'--suppress_warn\': \'$VIASH_PAR_SUPPRESS_WARN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUPPRESS_WARN="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --suppress_warn. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --suppress_warn=*) - [ -n "$VIASH_PAR_SUPPRESS_WARN" ] && ViashError Bad arguments for option \'--suppress_warn=*\': \'$VIASH_PAR_SUPPRESS_WARN\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SUPPRESS_WARN=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clip_R1) - [ -n "$VIASH_PAR_CLIP_R1" ] && ViashError Bad arguments for option \'--clip_R1\': \'$VIASH_PAR_CLIP_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIP_R1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip_R1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip_R1=*) - [ -n "$VIASH_PAR_CLIP_R1" ] && ViashError Bad arguments for option \'--clip_R1=*\': \'$VIASH_PAR_CLIP_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIP_R1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clip_R2) - [ -n "$VIASH_PAR_CLIP_R2" ] && ViashError Bad arguments for option \'--clip_R2\': \'$VIASH_PAR_CLIP_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIP_R2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clip_R2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clip_R2=*) - [ -n "$VIASH_PAR_CLIP_R2" ] && ViashError Bad arguments for option \'--clip_R2=*\': \'$VIASH_PAR_CLIP_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLIP_R2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --three_prime_clip_R1) - [ -n "$VIASH_PAR_THREE_PRIME_CLIP_R1" ] && ViashError Bad arguments for option \'--three_prime_clip_R1\': \'$VIASH_PAR_THREE_PRIME_CLIP_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREE_PRIME_CLIP_R1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --three_prime_clip_R1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --three_prime_clip_R1=*) - [ -n "$VIASH_PAR_THREE_PRIME_CLIP_R1" ] && ViashError Bad arguments for option \'--three_prime_clip_R1=*\': \'$VIASH_PAR_THREE_PRIME_CLIP_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREE_PRIME_CLIP_R1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --three_prime_clip_R2) - [ -n "$VIASH_PAR_THREE_PRIME_CLIP_R2" ] && ViashError Bad arguments for option \'--three_prime_clip_R2\': \'$VIASH_PAR_THREE_PRIME_CLIP_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREE_PRIME_CLIP_R2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --three_prime_clip_R2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --three_prime_clip_R2=*) - [ -n "$VIASH_PAR_THREE_PRIME_CLIP_R2" ] && ViashError Bad arguments for option \'--three_prime_clip_R2=*\': \'$VIASH_PAR_THREE_PRIME_CLIP_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_THREE_PRIME_CLIP_R2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --nextseq) - [ -n "$VIASH_PAR_NEXTSEQ" ] && ViashError Bad arguments for option \'--nextseq\': \'$VIASH_PAR_NEXTSEQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NEXTSEQ="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --nextseq. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --nextseq=*) - [ -n "$VIASH_PAR_NEXTSEQ" ] && ViashError Bad arguments for option \'--nextseq=*\': \'$VIASH_PAR_NEXTSEQ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NEXTSEQ=$(ViashRemoveFlags "$1") - shift 1 - ;; - --basename) - [ -n "$VIASH_PAR_BASENAME" ] && ViashError Bad arguments for option \'--basename\': \'$VIASH_PAR_BASENAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BASENAME="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --basename. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --basename=*) - [ -n "$VIASH_PAR_BASENAME" ] && ViashError Bad arguments for option \'--basename=*\': \'$VIASH_PAR_BASENAME\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BASENAME=$(ViashRemoveFlags "$1") - shift 1 - ;; - --cores) - [ -n "$VIASH_PAR_CORES" ] && ViashError Bad arguments for option \'--cores\': \'$VIASH_PAR_CORES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CORES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --cores. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --cores=*) - [ -n "$VIASH_PAR_CORES" ] && ViashError Bad arguments for option \'--cores=*\': \'$VIASH_PAR_CORES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CORES=$(ViashRemoveFlags "$1") - shift 1 - ;; - -j) - [ -n "$VIASH_PAR_CORES" ] && ViashError Bad arguments for option \'-j\': \'$VIASH_PAR_CORES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CORES="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -j. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --hardtrim5) - [ -n "$VIASH_PAR_HARDTRIM5" ] && ViashError Bad arguments for option \'--hardtrim5\': \'$VIASH_PAR_HARDTRIM5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_HARDTRIM5="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --hardtrim5. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --hardtrim5=*) - [ -n "$VIASH_PAR_HARDTRIM5" ] && ViashError Bad arguments for option \'--hardtrim5=*\': \'$VIASH_PAR_HARDTRIM5\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_HARDTRIM5=$(ViashRemoveFlags "$1") - shift 1 - ;; - --hardtrim3) - [ -n "$VIASH_PAR_HARDTRIM3" ] && ViashError Bad arguments for option \'--hardtrim3\': \'$VIASH_PAR_HARDTRIM3\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_HARDTRIM3="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --hardtrim3. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --hardtrim3=*) - [ -n "$VIASH_PAR_HARDTRIM3" ] && ViashError Bad arguments for option \'--hardtrim3=*\': \'$VIASH_PAR_HARDTRIM3\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_HARDTRIM3=$(ViashRemoveFlags "$1") - shift 1 - ;; - --clock) - [ -n "$VIASH_PAR_CLOCK" ] && ViashError Bad arguments for option \'--clock\': \'$VIASH_PAR_CLOCK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLOCK="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --clock. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --clock=*) - [ -n "$VIASH_PAR_CLOCK" ] && ViashError Bad arguments for option \'--clock=*\': \'$VIASH_PAR_CLOCK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_CLOCK=$(ViashRemoveFlags "$1") - shift 1 - ;; - --polyA) - [ -n "$VIASH_PAR_POLYA" ] && ViashError Bad arguments for option \'--polyA\': \'$VIASH_PAR_POLYA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_POLYA="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --polyA. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --polyA=*) - [ -n "$VIASH_PAR_POLYA" ] && ViashError Bad arguments for option \'--polyA=*\': \'$VIASH_PAR_POLYA\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_POLYA=$(ViashRemoveFlags "$1") - shift 1 - ;; - --implicon) - [ -n "$VIASH_PAR_IMPLICON" ] && ViashError Bad arguments for option \'--implicon\': \'$VIASH_PAR_IMPLICON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_IMPLICON="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --implicon. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --implicon=*) - [ -n "$VIASH_PAR_IMPLICON" ] && ViashError Bad arguments for option \'--implicon=*\': \'$VIASH_PAR_IMPLICON\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_IMPLICON=$(ViashRemoveFlags "$1") - shift 1 - ;; - --rrbs) - [ -n "$VIASH_PAR_RRBS" ] && ViashError Bad arguments for option \'--rrbs\': \'$VIASH_PAR_RRBS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RRBS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --rrbs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --rrbs=*) - [ -n "$VIASH_PAR_RRBS" ] && ViashError Bad arguments for option \'--rrbs=*\': \'$VIASH_PAR_RRBS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RRBS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --non_directional) - [ -n "$VIASH_PAR_NON_DIRECTIONAL" ] && ViashError Bad arguments for option \'--non_directional\': \'$VIASH_PAR_NON_DIRECTIONAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NON_DIRECTIONAL="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --non_directional. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --non_directional=*) - [ -n "$VIASH_PAR_NON_DIRECTIONAL" ] && ViashError Bad arguments for option \'--non_directional=*\': \'$VIASH_PAR_NON_DIRECTIONAL\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_NON_DIRECTIONAL=$(ViashRemoveFlags "$1") - shift 1 - ;; - --keep) - [ -n "$VIASH_PAR_KEEP" ] && ViashError Bad arguments for option \'--keep\': \'$VIASH_PAR_KEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_KEEP="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --keep. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --keep=*) - [ -n "$VIASH_PAR_KEEP" ] && ViashError Bad arguments for option \'--keep=*\': \'$VIASH_PAR_KEEP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_KEEP=$(ViashRemoveFlags "$1") - shift 1 - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --retain_unpaired) - [ -n "$VIASH_PAR_RETAIN_UNPAIRED" ] && ViashError Bad arguments for option \'--retain_unpaired\': \'$VIASH_PAR_RETAIN_UNPAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RETAIN_UNPAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --retain_unpaired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --retain_unpaired=*) - [ -n "$VIASH_PAR_RETAIN_UNPAIRED" ] && ViashError Bad arguments for option \'--retain_unpaired=*\': \'$VIASH_PAR_RETAIN_UNPAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_RETAIN_UNPAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --length_1) - [ -n "$VIASH_PAR_LENGTH_1" ] && ViashError Bad arguments for option \'--length_1\': \'$VIASH_PAR_LENGTH_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --length_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --length_1=*) - [ -n "$VIASH_PAR_LENGTH_1" ] && ViashError Bad arguments for option \'--length_1=*\': \'$VIASH_PAR_LENGTH_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r1) - [ -n "$VIASH_PAR_LENGTH_1" ] && ViashError Bad arguments for option \'-r1\': \'$VIASH_PAR_LENGTH_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --length_2) - [ -n "$VIASH_PAR_LENGTH_2" ] && ViashError Bad arguments for option \'--length_2\': \'$VIASH_PAR_LENGTH_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --length_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --length_2=*) - [ -n "$VIASH_PAR_LENGTH_2" ] && ViashError Bad arguments for option \'--length_2=*\': \'$VIASH_PAR_LENGTH_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - -r2) - [ -n "$VIASH_PAR_LENGTH_2" ] && ViashError Bad arguments for option \'-r2\': \'$VIASH_PAR_LENGTH_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LENGTH_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -r2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_dir) - [ -n "$VIASH_PAR_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--output_dir\': \'$VIASH_PAR_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_dir. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_dir=*) - [ -n "$VIASH_PAR_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--output_dir=*\': \'$VIASH_PAR_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIR=$(ViashRemoveFlags "$1") - shift 1 - ;; - -o) - [ -n "$VIASH_PAR_OUTPUT_DIR" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_DIR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_r1) - [ -n "$VIASH_PAR_TRIMMED_R1" ] && ViashError Bad arguments for option \'--trimmed_r1\': \'$VIASH_PAR_TRIMMED_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_R1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmed_r1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_r1=*) - [ -n "$VIASH_PAR_TRIMMED_R1" ] && ViashError Bad arguments for option \'--trimmed_r1=*\': \'$VIASH_PAR_TRIMMED_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_R1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimmed_r2) - [ -n "$VIASH_PAR_TRIMMED_R2" ] && ViashError Bad arguments for option \'--trimmed_r2\': \'$VIASH_PAR_TRIMMED_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_R2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmed_r2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_r2=*) - [ -n "$VIASH_PAR_TRIMMED_R2" ] && ViashError Bad arguments for option \'--trimmed_r2=*\': \'$VIASH_PAR_TRIMMED_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_R2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimming_report_r1) - [ -n "$VIASH_PAR_TRIMMING_REPORT_R1" ] && ViashError Bad arguments for option \'--trimming_report_r1\': \'$VIASH_PAR_TRIMMING_REPORT_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMING_REPORT_R1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimming_report_r1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimming_report_r1=*) - [ -n "$VIASH_PAR_TRIMMING_REPORT_R1" ] && ViashError Bad arguments for option \'--trimming_report_r1=*\': \'$VIASH_PAR_TRIMMING_REPORT_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMING_REPORT_R1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimming_report_r2) - [ -n "$VIASH_PAR_TRIMMING_REPORT_R2" ] && ViashError Bad arguments for option \'--trimming_report_r2\': \'$VIASH_PAR_TRIMMING_REPORT_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMING_REPORT_R2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimming_report_r2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimming_report_r2=*) - [ -n "$VIASH_PAR_TRIMMING_REPORT_R2" ] && ViashError Bad arguments for option \'--trimming_report_r2=*\': \'$VIASH_PAR_TRIMMING_REPORT_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMING_REPORT_R2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimmed_fastqc_html_1) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--trimmed_fastqc_html_1\': \'$VIASH_PAR_TRIMMED_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_HTML_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmed_fastqc_html_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_fastqc_html_1=*) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ] && ViashError Bad arguments for option \'--trimmed_fastqc_html_1=*\': \'$VIASH_PAR_TRIMMED_FASTQC_HTML_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_HTML_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimmed_fastqc_html_2) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--trimmed_fastqc_html_2\': \'$VIASH_PAR_TRIMMED_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_HTML_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmed_fastqc_html_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_fastqc_html_2=*) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ] && ViashError Bad arguments for option \'--trimmed_fastqc_html_2=*\': \'$VIASH_PAR_TRIMMED_FASTQC_HTML_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_HTML_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimmed_fastqc_zip_1) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--trimmed_fastqc_zip_1\': \'$VIASH_PAR_TRIMMED_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_ZIP_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmed_fastqc_zip_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_fastqc_zip_1=*) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ] && ViashError Bad arguments for option \'--trimmed_fastqc_zip_1=*\': \'$VIASH_PAR_TRIMMED_FASTQC_ZIP_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_ZIP_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --trimmed_fastqc_zip_2) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--trimmed_fastqc_zip_2\': \'$VIASH_PAR_TRIMMED_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_ZIP_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --trimmed_fastqc_zip_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --trimmed_fastqc_zip_2=*) - [ -n "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ] && ViashError Bad arguments for option \'--trimmed_fastqc_zip_2=*\': \'$VIASH_PAR_TRIMMED_FASTQC_ZIP_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_TRIMMED_FASTQC_ZIP_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --unpaired_r1) - [ -n "$VIASH_PAR_UNPAIRED_R1" ] && ViashError Bad arguments for option \'--unpaired_r1\': \'$VIASH_PAR_UNPAIRED_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNPAIRED_R1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --unpaired_r1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --unpaired_r1=*) - [ -n "$VIASH_PAR_UNPAIRED_R1" ] && ViashError Bad arguments for option \'--unpaired_r1=*\': \'$VIASH_PAR_UNPAIRED_R1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNPAIRED_R1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --unpaired_r2) - [ -n "$VIASH_PAR_UNPAIRED_R2" ] && ViashError Bad arguments for option \'--unpaired_r2\': \'$VIASH_PAR_UNPAIRED_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNPAIRED_R2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --unpaired_r2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --unpaired_r2=*) - [ -n "$VIASH_PAR_UNPAIRED_R2" ] && ViashError Bad arguments for option \'--unpaired_r2=*\': \'$VIASH_PAR_UNPAIRED_R2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UNPAIRED_R2=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/trimgalore:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then - VIASH_PAR_OUTPUT_DIR="trimmed_output" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=';' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_QUALITY" ]]; then - if ! [[ "$VIASH_PAR_QUALITY" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--quality' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PHRED33" ]]; then - if ! [[ "$VIASH_PAR_PHRED33" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--phred33' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PHRED64" ]]; then - if ! [[ "$VIASH_PAR_PHRED64" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--phred64' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_FASTQC" ]]; then - if ! [[ "$VIASH_PAR_FASTQC" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--fastqc' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ILLUMINA" ]]; then - if ! [[ "$VIASH_PAR_ILLUMINA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--illumina' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_STRANDED_ILLUMINA" ]]; then - if ! [[ "$VIASH_PAR_STRANDED_ILLUMINA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--stranded_illumina' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NEXTERA" ]]; then - if ! [[ "$VIASH_PAR_NEXTERA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--nextera' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SMALL_RNA" ]]; then - if ! [[ "$VIASH_PAR_SMALL_RNA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--small_rna' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CONSIDER_ALREADY_TRIMMED" ]]; then - if ! [[ "$VIASH_PAR_CONSIDER_ALREADY_TRIMMED" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--consider_already_trimmed' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_LENGTH" ]]; then - if ! [[ "$VIASH_PAR_MAX_LENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_length' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_STRINGENCY" ]]; then - if ! [[ "$VIASH_PAR_STRINGENCY" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--stringency' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_ERROR_RATE" ]]; then - if ! [[ "$VIASH_PAR_ERROR_RATE" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then - ViashError '--error_rate' has to be a double. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GZIP" ]]; then - if ! [[ "$VIASH_PAR_GZIP" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--gzip' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DONT_GZIP" ]]; then - if ! [[ "$VIASH_PAR_DONT_GZIP" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--dont_gzip' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LENGTH" ]]; then - if ! [[ "$VIASH_PAR_LENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--length' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_N" ]]; then - if ! [[ "$VIASH_PAR_MAX_N" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_n' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_TRIM_N" ]]; then - if ! [[ "$VIASH_PAR_TRIM_N" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--trim_n' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NO_REPORT_FILE" ]]; then - if ! [[ "$VIASH_PAR_NO_REPORT_FILE" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--no_report_file' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_SUPPRESS_WARN" ]]; then - if ! [[ "$VIASH_PAR_SUPPRESS_WARN" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--suppress_warn' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CLIP_R1" ]]; then - if ! [[ "$VIASH_PAR_CLIP_R1" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip_R1' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CLIP_R2" ]]; then - if ! [[ "$VIASH_PAR_CLIP_R2" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--clip_R2' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_THREE_PRIME_CLIP_R1" ]]; then - if ! [[ "$VIASH_PAR_THREE_PRIME_CLIP_R1" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--three_prime_clip_R1' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_THREE_PRIME_CLIP_R2" ]]; then - if ! [[ "$VIASH_PAR_THREE_PRIME_CLIP_R2" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--three_prime_clip_R2' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NEXTSEQ" ]]; then - if ! [[ "$VIASH_PAR_NEXTSEQ" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--nextseq' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CORES" ]]; then - if ! [[ "$VIASH_PAR_CORES" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--cores' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_HARDTRIM5" ]]; then - if ! [[ "$VIASH_PAR_HARDTRIM5" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--hardtrim5' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_HARDTRIM3" ]]; then - if ! [[ "$VIASH_PAR_HARDTRIM3" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--hardtrim3' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_CLOCK" ]]; then - if ! [[ "$VIASH_PAR_CLOCK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--clock' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_POLYA" ]]; then - if ! [[ "$VIASH_PAR_POLYA" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--polyA' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_IMPLICON" ]]; then - if ! [[ "$VIASH_PAR_IMPLICON" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--implicon' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_RRBS" ]]; then - if ! [[ "$VIASH_PAR_RRBS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--rrbs' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_NON_DIRECTIONAL" ]]; then - if ! [[ "$VIASH_PAR_NON_DIRECTIONAL" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--non_directional' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_KEEP" ]]; then - if ! [[ "$VIASH_PAR_KEEP" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--keep' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_RETAIN_UNPAIRED" ]]; then - if ! [[ "$VIASH_PAR_RETAIN_UNPAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--retain_unpaired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LENGTH_1" ]]; then - if ! [[ "$VIASH_PAR_LENGTH_1" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--length_1' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_LENGTH_2" ]]; then - if ! [[ "$VIASH_PAR_LENGTH_2" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--length_2' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_DIR")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_DIR")" -fi -if [ ! -z "$VIASH_PAR_TRIMMED_R1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMED_R1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMED_R1")" -fi -if [ ! -z "$VIASH_PAR_TRIMMED_R2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMED_R2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMED_R2")" -fi -if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMING_REPORT_R1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMING_REPORT_R1")" -fi -if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMING_REPORT_R2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMING_REPORT_R2")" -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_HTML_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_HTML_1")" -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_HTML_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_HTML_2")" -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1")" -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ] && [ ! -d "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2")" -fi -if [ ! -z "$VIASH_PAR_UNPAIRED_R1" ] && [ ! -d "$(dirname "$VIASH_PAR_UNPAIRED_R1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_UNPAIRED_R1")" -fi -if [ ! -z "$VIASH_PAR_UNPAIRED_R2" ] && [ ! -d "$(dirname "$VIASH_PAR_UNPAIRED_R2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_UNPAIRED_R2")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_DIR")" ) - VIASH_PAR_OUTPUT_DIR=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_DIR") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_DIR" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMED_R1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMED_R1")" ) - VIASH_PAR_TRIMMED_R1=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMED_R1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMED_R1" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMED_R2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMED_R2")" ) - VIASH_PAR_TRIMMED_R2=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMED_R2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMED_R2" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMING_REPORT_R1")" ) - VIASH_PAR_TRIMMING_REPORT_R1=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMING_REPORT_R1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMING_REPORT_R1" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMING_REPORT_R2")" ) - VIASH_PAR_TRIMMING_REPORT_R2=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMING_REPORT_R2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMING_REPORT_R2" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMED_FASTQC_HTML_1")" ) - VIASH_PAR_TRIMMED_FASTQC_HTML_1=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMED_FASTQC_HTML_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMED_FASTQC_HTML_2")" ) - VIASH_PAR_TRIMMED_FASTQC_HTML_2=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMED_FASTQC_HTML_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1")" ) - VIASH_PAR_TRIMMED_FASTQC_ZIP_1=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ) -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2")" ) - VIASH_PAR_TRIMMED_FASTQC_ZIP_2=$(ViashDockerAutodetectMount "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ) -fi -if [ ! -z "$VIASH_PAR_UNPAIRED_R1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_UNPAIRED_R1")" ) - VIASH_PAR_UNPAIRED_R1=$(ViashDockerAutodetectMount "$VIASH_PAR_UNPAIRED_R1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_UNPAIRED_R1" ) -fi -if [ ! -z "$VIASH_PAR_UNPAIRED_R2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_UNPAIRED_R2")" ) - VIASH_PAR_UNPAIRED_R2=$(ViashDockerAutodetectMount "$VIASH_PAR_UNPAIRED_R2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_UNPAIRED_R2" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-trimgalore-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_QUALITY+x} ]; then echo "${VIASH_PAR_QUALITY}" | sed "s#'#'\"'\"'#g;s#.*#par_quality='&'#" ; else echo "# par_quality="; fi ) -$( if [ ! -z ${VIASH_PAR_PHRED33+x} ]; then echo "${VIASH_PAR_PHRED33}" | sed "s#'#'\"'\"'#g;s#.*#par_phred33='&'#" ; else echo "# par_phred33="; fi ) -$( if [ ! -z ${VIASH_PAR_PHRED64+x} ]; then echo "${VIASH_PAR_PHRED64}" | sed "s#'#'\"'\"'#g;s#.*#par_phred64='&'#" ; else echo "# par_phred64="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC+x} ]; then echo "${VIASH_PAR_FASTQC}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc='&'#" ; else echo "# par_fastqc="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_ARGS+x} ]; then echo "${VIASH_PAR_FASTQC_ARGS}" | sed "s#'#'\"'\"'#g;s#.*#par_fastqc_args='&'#" ; else echo "# par_fastqc_args="; fi ) -$( if [ ! -z ${VIASH_PAR_ADAPTER+x} ]; then echo "${VIASH_PAR_ADAPTER}" | sed "s#'#'\"'\"'#g;s#.*#par_adapter='&'#" ; else echo "# par_adapter="; fi ) -$( if [ ! -z ${VIASH_PAR_ADAPTER2+x} ]; then echo "${VIASH_PAR_ADAPTER2}" | sed "s#'#'\"'\"'#g;s#.*#par_adapter2='&'#" ; else echo "# par_adapter2="; fi ) -$( if [ ! -z ${VIASH_PAR_ILLUMINA+x} ]; then echo "${VIASH_PAR_ILLUMINA}" | sed "s#'#'\"'\"'#g;s#.*#par_illumina='&'#" ; else echo "# par_illumina="; fi ) -$( if [ ! -z ${VIASH_PAR_STRANDED_ILLUMINA+x} ]; then echo "${VIASH_PAR_STRANDED_ILLUMINA}" | sed "s#'#'\"'\"'#g;s#.*#par_stranded_illumina='&'#" ; else echo "# par_stranded_illumina="; fi ) -$( if [ ! -z ${VIASH_PAR_NEXTERA+x} ]; then echo "${VIASH_PAR_NEXTERA}" | sed "s#'#'\"'\"'#g;s#.*#par_nextera='&'#" ; else echo "# par_nextera="; fi ) -$( if [ ! -z ${VIASH_PAR_SMALL_RNA+x} ]; then echo "${VIASH_PAR_SMALL_RNA}" | sed "s#'#'\"'\"'#g;s#.*#par_small_rna='&'#" ; else echo "# par_small_rna="; fi ) -$( if [ ! -z ${VIASH_PAR_CONSIDER_ALREADY_TRIMMED+x} ]; then echo "${VIASH_PAR_CONSIDER_ALREADY_TRIMMED}" | sed "s#'#'\"'\"'#g;s#.*#par_consider_already_trimmed='&'#" ; else echo "# par_consider_already_trimmed="; fi ) -$( if [ ! -z ${VIASH_PAR_MAX_LENGTH+x} ]; then echo "${VIASH_PAR_MAX_LENGTH}" | sed "s#'#'\"'\"'#g;s#.*#par_max_length='&'#" ; else echo "# par_max_length="; fi ) -$( if [ ! -z ${VIASH_PAR_STRINGENCY+x} ]; then echo "${VIASH_PAR_STRINGENCY}" | sed "s#'#'\"'\"'#g;s#.*#par_stringency='&'#" ; else echo "# par_stringency="; fi ) -$( if [ ! -z ${VIASH_PAR_ERROR_RATE+x} ]; then echo "${VIASH_PAR_ERROR_RATE}" | sed "s#'#'\"'\"'#g;s#.*#par_error_rate='&'#" ; else echo "# par_error_rate="; fi ) -$( if [ ! -z ${VIASH_PAR_GZIP+x} ]; then echo "${VIASH_PAR_GZIP}" | sed "s#'#'\"'\"'#g;s#.*#par_gzip='&'#" ; else echo "# par_gzip="; fi ) -$( if [ ! -z ${VIASH_PAR_DONT_GZIP+x} ]; then echo "${VIASH_PAR_DONT_GZIP}" | sed "s#'#'\"'\"'#g;s#.*#par_dont_gzip='&'#" ; else echo "# par_dont_gzip="; fi ) -$( if [ ! -z ${VIASH_PAR_LENGTH+x} ]; then echo "${VIASH_PAR_LENGTH}" | sed "s#'#'\"'\"'#g;s#.*#par_length='&'#" ; else echo "# par_length="; fi ) -$( if [ ! -z ${VIASH_PAR_MAX_N+x} ]; then echo "${VIASH_PAR_MAX_N}" | sed "s#'#'\"'\"'#g;s#.*#par_max_n='&'#" ; else echo "# par_max_n="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIM_N+x} ]; then echo "${VIASH_PAR_TRIM_N}" | sed "s#'#'\"'\"'#g;s#.*#par_trim_n='&'#" ; else echo "# par_trim_n="; fi ) -$( if [ ! -z ${VIASH_PAR_NO_REPORT_FILE+x} ]; then echo "${VIASH_PAR_NO_REPORT_FILE}" | sed "s#'#'\"'\"'#g;s#.*#par_no_report_file='&'#" ; else echo "# par_no_report_file="; fi ) -$( if [ ! -z ${VIASH_PAR_SUPPRESS_WARN+x} ]; then echo "${VIASH_PAR_SUPPRESS_WARN}" | sed "s#'#'\"'\"'#g;s#.*#par_suppress_warn='&'#" ; else echo "# par_suppress_warn="; fi ) -$( if [ ! -z ${VIASH_PAR_CLIP_R1+x} ]; then echo "${VIASH_PAR_CLIP_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_clip_R1='&'#" ; else echo "# par_clip_R1="; fi ) -$( if [ ! -z ${VIASH_PAR_CLIP_R2+x} ]; then echo "${VIASH_PAR_CLIP_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_clip_R2='&'#" ; else echo "# par_clip_R2="; fi ) -$( if [ ! -z ${VIASH_PAR_THREE_PRIME_CLIP_R1+x} ]; then echo "${VIASH_PAR_THREE_PRIME_CLIP_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_three_prime_clip_R1='&'#" ; else echo "# par_three_prime_clip_R1="; fi ) -$( if [ ! -z ${VIASH_PAR_THREE_PRIME_CLIP_R2+x} ]; then echo "${VIASH_PAR_THREE_PRIME_CLIP_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_three_prime_clip_R2='&'#" ; else echo "# par_three_prime_clip_R2="; fi ) -$( if [ ! -z ${VIASH_PAR_NEXTSEQ+x} ]; then echo "${VIASH_PAR_NEXTSEQ}" | sed "s#'#'\"'\"'#g;s#.*#par_nextseq='&'#" ; else echo "# par_nextseq="; fi ) -$( if [ ! -z ${VIASH_PAR_BASENAME+x} ]; then echo "${VIASH_PAR_BASENAME}" | sed "s#'#'\"'\"'#g;s#.*#par_basename='&'#" ; else echo "# par_basename="; fi ) -$( if [ ! -z ${VIASH_PAR_CORES+x} ]; then echo "${VIASH_PAR_CORES}" | sed "s#'#'\"'\"'#g;s#.*#par_cores='&'#" ; else echo "# par_cores="; fi ) -$( if [ ! -z ${VIASH_PAR_HARDTRIM5+x} ]; then echo "${VIASH_PAR_HARDTRIM5}" | sed "s#'#'\"'\"'#g;s#.*#par_hardtrim5='&'#" ; else echo "# par_hardtrim5="; fi ) -$( if [ ! -z ${VIASH_PAR_HARDTRIM3+x} ]; then echo "${VIASH_PAR_HARDTRIM3}" | sed "s#'#'\"'\"'#g;s#.*#par_hardtrim3='&'#" ; else echo "# par_hardtrim3="; fi ) -$( if [ ! -z ${VIASH_PAR_CLOCK+x} ]; then echo "${VIASH_PAR_CLOCK}" | sed "s#'#'\"'\"'#g;s#.*#par_clock='&'#" ; else echo "# par_clock="; fi ) -$( if [ ! -z ${VIASH_PAR_POLYA+x} ]; then echo "${VIASH_PAR_POLYA}" | sed "s#'#'\"'\"'#g;s#.*#par_polyA='&'#" ; else echo "# par_polyA="; fi ) -$( if [ ! -z ${VIASH_PAR_IMPLICON+x} ]; then echo "${VIASH_PAR_IMPLICON}" | sed "s#'#'\"'\"'#g;s#.*#par_implicon='&'#" ; else echo "# par_implicon="; fi ) -$( if [ ! -z ${VIASH_PAR_RRBS+x} ]; then echo "${VIASH_PAR_RRBS}" | sed "s#'#'\"'\"'#g;s#.*#par_rrbs='&'#" ; else echo "# par_rrbs="; fi ) -$( if [ ! -z ${VIASH_PAR_NON_DIRECTIONAL+x} ]; then echo "${VIASH_PAR_NON_DIRECTIONAL}" | sed "s#'#'\"'\"'#g;s#.*#par_non_directional='&'#" ; else echo "# par_non_directional="; fi ) -$( if [ ! -z ${VIASH_PAR_KEEP+x} ]; then echo "${VIASH_PAR_KEEP}" | sed "s#'#'\"'\"'#g;s#.*#par_keep='&'#" ; else echo "# par_keep="; fi ) -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_RETAIN_UNPAIRED+x} ]; then echo "${VIASH_PAR_RETAIN_UNPAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_retain_unpaired='&'#" ; else echo "# par_retain_unpaired="; fi ) -$( if [ ! -z ${VIASH_PAR_LENGTH_1+x} ]; then echo "${VIASH_PAR_LENGTH_1}" | sed "s#'#'\"'\"'#g;s#.*#par_length_1='&'#" ; else echo "# par_length_1="; fi ) -$( if [ ! -z ${VIASH_PAR_LENGTH_2+x} ]; then echo "${VIASH_PAR_LENGTH_2}" | sed "s#'#'\"'\"'#g;s#.*#par_length_2='&'#" ; else echo "# par_length_2="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\"'\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMED_R1+x} ]; then echo "${VIASH_PAR_TRIMMED_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_trimmed_r1='&'#" ; else echo "# par_trimmed_r1="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMED_R2+x} ]; then echo "${VIASH_PAR_TRIMMED_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_trimmed_r2='&'#" ; else echo "# par_trimmed_r2="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMING_REPORT_R1+x} ]; then echo "${VIASH_PAR_TRIMMING_REPORT_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_trimming_report_r1='&'#" ; else echo "# par_trimming_report_r1="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMING_REPORT_R2+x} ]; then echo "${VIASH_PAR_TRIMMING_REPORT_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_trimming_report_r2='&'#" ; else echo "# par_trimming_report_r2="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_HTML_1+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_HTML_1}" | sed "s#'#'\"'\"'#g;s#.*#par_trimmed_fastqc_html_1='&'#" ; else echo "# par_trimmed_fastqc_html_1="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_HTML_2+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_HTML_2}" | sed "s#'#'\"'\"'#g;s#.*#par_trimmed_fastqc_html_2='&'#" ; else echo "# par_trimmed_fastqc_html_2="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_ZIP_1+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_ZIP_1}" | sed "s#'#'\"'\"'#g;s#.*#par_trimmed_fastqc_zip_1='&'#" ; else echo "# par_trimmed_fastqc_zip_1="; fi ) -$( if [ ! -z ${VIASH_PAR_TRIMMED_FASTQC_ZIP_2+x} ]; then echo "${VIASH_PAR_TRIMMED_FASTQC_ZIP_2}" | sed "s#'#'\"'\"'#g;s#.*#par_trimmed_fastqc_zip_2='&'#" ; else echo "# par_trimmed_fastqc_zip_2="; fi ) -$( if [ ! -z ${VIASH_PAR_UNPAIRED_R1+x} ]; then echo "${VIASH_PAR_UNPAIRED_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_unpaired_r1='&'#" ; else echo "# par_unpaired_r1="; fi ) -$( if [ ! -z ${VIASH_PAR_UNPAIRED_R2+x} ]; then echo "${VIASH_PAR_UNPAIRED_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_unpaired_r2='&'#" ; else echo "# par_unpaired_r2="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -[[ ! -d \$output_dir ]] && mkdir -p \$par_output_dir - -IFS=";" read -ra input <<< \$par_input - -unset_if_false=( - par_phred33 - par_phred64 - par_fastqc - par_illumina - par_stranded_illumina - par_nextera - par_small_rna - par_gzip - par_dont_gzip - par_no_report_file - par_suppress_warn - par_clock - par_polyA - par_rrbs - par_non_directional - par_keep par_paired - par_retain_unpaired -) - -for par in \${unset_if_false[@]}; do - test_val="\${!par}" - [[ "\$test_val" == "false" ]] && unset \$par -done - -trim_galore \\ - \${par_quality:+-q "\${par_quality}"} \\ - \${par_phred33:+--phred33} \\ - \${par_phred64:+--phred64 } \\ - \${par_fastqc:+--fastqc } \\ - \${par_fastqc_args:+--fastqc_args "\${par_fastqc_args}"} \\ - \${par_adapter:+-a "\${par_adapter}"} \\ - \${par_adapter2:+-a2 "\${par_adapter2}"} \\ - \${par_illumina:+--illumina} \\ - \${par_stranded_illumina:+--stranded_illumina} \\ - \${par_nextera:+--nextera} \\ - \${par_small_rna:+--small_rna} \\ - \${par_consider_already_trimmed:+--consider_already_trimmed "\${par_consider_already_trimmed}"} \\ - \${par_max_length:+--max_length "\${par_max_length}"} \\ - \${par_stringency:+--stringency "\${par_stringency}"} \\ - \${par_error_rate:+-e "\${par_error_rate}"} \\ - \${par_gzip:+--gzip} \\ - \${par_dont_gzip:+--dont_gzip} \\ - \${par_length:+--length "\${par_length}"} \\ - \${par_max_n:+--max_n "\${par_max_n}"} \\ - \${par_trim_n:+--trim-n "\${par_trim_n}"} \\ - \${par_no_report_file:+--no_report_file} \\ - \${par_suppress_warn:+--suppress_warn} \\ - \${par_clip_R1:+--clip_R1 "\${par_clip_R1}"} \\ - \${par_clip_R2:+--clip_R2 "\${par_clip_R2}"} \\ - \${par_three_prime_clip_R1:+--three_prime_clip_R1 "\${par_three_prime_clip_R1}"} \\ - \${par_three_prime_clip_R2:+--three_prime_clip_R2 "\${par_three_prime_clip_R2}"} \\ - \${par_nextseq:+--nextseq "\${par_nextseq}"} \\ - \${par_basename:+-basename "\${par_basename}"} \\ - \${par_hardtrim5:+--hardtrim5 "\${par_hardtrim5}"} \\ - \${par_hardtrim3:+--hardtrim3 "\${par_hardtrim3}"} \\ - \${par_clock:+--clock} \\ - \${par_polyA:+--polyA} \\ - \${par_implicon:+--implicon "\${par_implicon}"} \\ - \${par_rrbs:+--rrbs} \\ - \${par_non_directional:+--non_directional} \\ - \${par_keep:+--keep} \\ - \${par_paired:+--paired} \\ - \${par_retain_unpaired:+--retain_unpaired} \\ - \${par_length_1:+-r1 "\${par_length_1}"} \\ - \${par_length_2:+-r2 "\${par_length_2}"} \\ - \${par_cores:+-j "\${par_cores}"} \\ - -o \$par_output_dir \\ - \${input[*]} - -if [ \$par_paired == "true" ]; then - - input_r1=\$(basename -- "\${input[0]}") - input_r2=\$(basename -- "\${input[1]}") - [[ ! -z "\$par_trimmed_r1" ]] && mv \$par_output_dir/*val_1.f*q* \$par_trimmed_r1 - [[ ! -z "\$par_trimmed_r2" ]] && mv \$par_output_dir/*val_2.f*q* \$par_trimmed_r2 - [[ ! -z "\$par_trimming_report_r1" ]] && mv \$par_output_dir/\${input_r1}_trimming_report.txt \$par_trimming_report_r1 - [[ ! -z "\$par_trimming_report_r2" ]] && mv \$par_output_dir/\${input_r2}_trimming_report.txt \$par_trimming_report_r2 - - if [ "\$par_fastqc" == "true" ]; then - [[ ! -z "\$par_trimmed_fastqc_html_1" ]] && mv \$par_output_dir/*val_1_fastqc.html \$par_trimmed_fastqc_html_1 - [[ ! -z "\$par_trimmed_fastqc_html_2" ]] && mv \$par_output_dir/*val_2_fastqc.html \$par_trimmed_fastqc_html_2 - [[ ! -z "\$par_trimmed_fastqc_zip_1" ]] && mv \$par_output_dir/*val_1_fastqc.zip \$par_trimmed_fastqc_zip_1 - [[ ! -z "\$par_trimmed_fastqc_zip_2" ]] && mv \$par_output_dir/*val_2_fastqc.zip \$par_trimmed_fastqc_zip_2 - fi - - if [ "\$par_retain_unpaired" == "true" ]; then - [[ ! -z "\$par_unpaired_r1" ]] && mv \$par_output_dir/*.unpaired_1.f*q* \$par_unpaired_r1 - [[ ! -z "\$par_unpaired_r2" ]] && mv \$par_output_dir/*.unpaired_2.f*q* \$par_unpaired_r2 - fi - -else - - input_r1=\$(basename -- "\${input[0]}") - [[ ! -z "\$par_trimmed_r1" ]] && mv \$par_output_dir/*_trimmed.fq* \$par_trimmed_r1 - [[ ! -z "\$par_trimming_report_r1" ]] && mv \$par_output_dir/\${input_r1}_trimming_report.txt \$par_trimming_report_r1 - - if [ "\$par_fastqc" == "true" ]; then - [[ ! -z "\$par_trimmed_fastqc_html_1" ]] && mv \$par_output_dir/*_trimmed_fastqc.html \$par_trimmed_fastqc_html_1 - [[ ! -z "\$par_trimmed_fastqc_zip_1" ]] && mv \$par_output_dir/*_trimmed_fastqc.zip \$par_trimmed_fastqc_zip_1 - fi - -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=';' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ]; then - VIASH_PAR_OUTPUT_DIR=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_DIR") - fi - if [ ! -z "$VIASH_PAR_TRIMMED_R1" ]; then - VIASH_PAR_TRIMMED_R1=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMED_R1") - fi - if [ ! -z "$VIASH_PAR_TRIMMED_R2" ]; then - VIASH_PAR_TRIMMED_R2=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMED_R2") - fi - if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R1" ]; then - VIASH_PAR_TRIMMING_REPORT_R1=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMING_REPORT_R1") - fi - if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R2" ]; then - VIASH_PAR_TRIMMING_REPORT_R2=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMING_REPORT_R2") - fi - if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ]; then - VIASH_PAR_TRIMMED_FASTQC_HTML_1=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMED_FASTQC_HTML_1") - fi - if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ]; then - VIASH_PAR_TRIMMED_FASTQC_HTML_2=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMED_FASTQC_HTML_2") - fi - if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ]; then - VIASH_PAR_TRIMMED_FASTQC_ZIP_1=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1") - fi - if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ]; then - VIASH_PAR_TRIMMED_FASTQC_ZIP_2=$(ViashDockerStripAutomount "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2") - fi - if [ ! -z "$VIASH_PAR_UNPAIRED_R1" ]; then - VIASH_PAR_UNPAIRED_R1=$(ViashDockerStripAutomount "$VIASH_PAR_UNPAIRED_R1") - fi - if [ ! -z "$VIASH_PAR_UNPAIRED_R2" ]; then - VIASH_PAR_UNPAIRED_R2=$(ViashDockerStripAutomount "$VIASH_PAR_UNPAIRED_R2") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT_DIR" ] && [ ! -e "$VIASH_PAR_OUTPUT_DIR" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_DIR' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMED_R1" ] && [ ! -e "$VIASH_PAR_TRIMMED_R1" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMED_R1' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMED_R2" ] && [ ! -e "$VIASH_PAR_TRIMMED_R2" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMED_R2' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R1" ] && [ ! -e "$VIASH_PAR_TRIMMING_REPORT_R1" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMING_REPORT_R1' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMING_REPORT_R2" ] && [ ! -e "$VIASH_PAR_TRIMMING_REPORT_R2" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMING_REPORT_R2' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ] && [ ! -e "$VIASH_PAR_TRIMMED_FASTQC_HTML_1" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMED_FASTQC_HTML_1' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ] && [ ! -e "$VIASH_PAR_TRIMMED_FASTQC_HTML_2" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMED_FASTQC_HTML_2' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ] && [ ! -e "$VIASH_PAR_TRIMMED_FASTQC_ZIP_1" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMED_FASTQC_ZIP_1' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ] && [ ! -e "$VIASH_PAR_TRIMMED_FASTQC_ZIP_2" ]; then - ViashError "Output file '$VIASH_PAR_TRIMMED_FASTQC_ZIP_2' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_UNPAIRED_R1" ] && [ ! -e "$VIASH_PAR_UNPAIRED_R1" ]; then - ViashError "Output file '$VIASH_PAR_UNPAIRED_R1' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_UNPAIRED_R2" ] && [ ! -e "$VIASH_PAR_UNPAIRED_R2" ]; then - ViashError "Output file '$VIASH_PAR_UNPAIRED_R2' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/tx2gene/.config.vsh.yaml b/target/executable/tx2gene/.config.vsh.yaml index c5d4316..8045397 100644 --- a/target/executable/tx2gene/.config.vsh.yaml +++ b/target/executable/tx2gene/.config.vsh.yaml @@ -87,7 +87,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -185,8 +185,8 @@ build_info: output: "target/executable/tx2gene" executable: "target/executable/tx2gene/tx2gene" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -197,7 +197,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/tx2gene/tx2gene b/target/executable/tx2gene/tx2gene index e937a4b..6daaaac 100755 --- a/target/executable/tx2gene/tx2gene +++ b/target/executable/tx2gene/tx2gene @@ -483,9 +483,9 @@ RUN apt-get update && \ RUN pip install --upgrade pip LABEL org.opencontainers.image.description="Companion container for running component tx2gene" -LABEL org.opencontainers.image.created="2024-11-27T08:42:31Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:50Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/tximport/.config.vsh.yaml b/target/executable/tximport/.config.vsh.yaml index 5d50b00..dbb1495 100644 --- a/target/executable/tximport/.config.vsh.yaml +++ b/target/executable/tximport/.config.vsh.yaml @@ -146,7 +146,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -251,8 +251,8 @@ build_info: output: "target/executable/tximport" executable: "target/executable/tximport/tximport" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -263,7 +263,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/tximport/tximport b/target/executable/tximport/tximport index af60eee..9c3c5bb 100755 --- a/target/executable/tximport/tximport +++ b/target/executable/tximport/tximport @@ -508,9 +508,9 @@ RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packag Rscript -e 'remotes::install_cran(c("jsonlite"), repos = "https://cran.rstudio.com")' LABEL org.opencontainers.image.description="Companion container for running component tximport" -LABEL org.opencontainers.image.created="2024-11-27T08:42:30Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:51Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/tximport/tximport.r b/target/executable/tximport/tximport.r index c47f8e6..5036399 100755 --- a/target/executable/tximport/tximport.r +++ b/target/executable/tximport/tximport.r @@ -137,5 +137,6 @@ if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) done <- lapply(params, write_se_table) # Output session information and citations -citation("tximeta") +# Removed for now because the 'tximeta' package is not found sometimes +# citation("tximeta") sessionInfo() \ No newline at end of file diff --git a/target/executable/ucsc/bedclip/.config.vsh.yaml b/target/executable/ucsc/bedclip/.config.vsh.yaml index 8d3b859..f3a6b86 100644 --- a/target/executable/ucsc/bedclip/.config.vsh.yaml +++ b/target/executable/ucsc/bedclip/.config.vsh.yaml @@ -65,7 +65,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -164,8 +164,8 @@ build_info: output: "target/executable/ucsc/bedclip" executable: "target/executable/ucsc/bedclip/bedclip" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -176,7 +176,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/ucsc/bedclip/bedclip b/target/executable/ucsc/bedclip/bedclip index 654e163..79bb529 100755 --- a/target/executable/ucsc/bedclip/bedclip +++ b/target/executable/ucsc/bedclip/bedclip @@ -473,9 +473,9 @@ RUN apt-get update && \ RUN rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedClip /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component ucsc bedclip" -LABEL org.opencontainers.image.created="2024-11-27T08:42:30Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:50Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml b/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml index 074a06e..13ad804 100644 --- a/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml +++ b/target/executable/ucsc/bedgraphtobigwig/.config.vsh.yaml @@ -65,7 +65,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -164,8 +164,8 @@ build_info: output: "target/executable/ucsc/bedgraphtobigwig" executable: "target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -176,7 +176,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig b/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig index f56d9b2..6f22812 100755 --- a/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig +++ b/target/executable/ucsc/bedgraphtobigwig/bedgraphtobigwig @@ -473,9 +473,9 @@ RUN apt-get update && \ RUN rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/ LABEL org.opencontainers.image.description="Companion container for running component ucsc bedgraphtobigwig" -LABEL org.opencontainers.image.created="2024-11-27T08:42:31Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" +LABEL org.opencontainers.image.created="2024-11-27T11:43:50Z" +LABEL org.opencontainers.image.source="https://x-access-token/ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" +LABEL org.opencontainers.image.revision="0c8a7eb648edb0567b7860756b79dfbccbbac27b" LABEL org.opencontainers.image.version="main" VIASHDOCKER diff --git a/target/executable/umitools/umitools_dedup/.config.vsh.yaml b/target/executable/umitools/umitools_dedup/.config.vsh.yaml deleted file mode 100644 index f2dac9d..0000000 --- a/target/executable/umitools/umitools_dedup/.config.vsh.yaml +++ /dev/null @@ -1,225 +0,0 @@ -name: "umitools_dedup" -namespace: "umitools" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam" - description: "Input BAM file" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bai" - description: "BAM index" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--get_output_stats" - description: "Whether or not to generate output stats." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output_bam" - description: "Deduplicated BAM file" - info: null - default: - - "$id.$key.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_stats" - description: "Directory containing UMI based dedupllication statistics files" - info: null - default: - - "$id.umi_dedup.stats" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\ - \ to the read.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "chr19.bam" -- type: "file" - path: "chr19.bam.bai" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/umitools/dedup/main.nf" - - "modules/nf-core/umitools/dedup/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "pip" - interactive: false - - type: "python" - user: false - packages: - - "umi_tools" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/umitools/umitools_dedup/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/umitools/umitools_dedup" - executable: "target/executable/umitools/umitools_dedup/umitools_dedup" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/umitools/umitools_dedup/umitools_dedup b/target/executable/umitools/umitools_dedup/umitools_dedup deleted file mode 100755 index d202d11..0000000 --- a/target/executable/umitools/umitools_dedup/umitools_dedup +++ /dev/null @@ -1,1206 +0,0 @@ -#!/usr/bin/env bash - -# umitools_dedup main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="umitools_dedup" -VIASH_META_FUNCTIONALITY_NAME="umitools_dedup" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "umitools_dedup main" - echo "" - echo "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the" - echo "read." - echo "" - echo "Input:" - echo " --paired" - echo " type: boolean" - echo " default: false" - echo " Paired fastq files or not?" - echo "" - echo " --bam" - echo " type: file, file must exist" - echo " Input BAM file" - echo "" - echo " --bai" - echo " type: file, file must exist" - echo " BAM index" - echo "" - echo " --get_output_stats" - echo " type: boolean" - echo " Whether or not to generate output stats." - echo "" - echo "Output:" - echo " --output_bam" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.bam" - echo " Deduplicated BAM file" - echo "" - echo " --output_stats" - echo " type: file, output, file must exist" - echo " default: \$id.umi_dedup.stats" - echo " Directory containing UMI based dedupllication statistics files" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y pip && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "umi_tools" - -LABEL org.opencontainers.image.description="Companion container for running component umitools umitools_dedup" -LABEL org.opencontainers.image.created="2024-11-27T08:42:26Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "umitools_dedup main" - exit - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bam) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam=*) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --bai) - [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAI="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bai. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bai=*) - [ -n "$VIASH_PAR_BAI" ] && ViashError Bad arguments for option \'--bai=*\': \'$VIASH_PAR_BAI\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAI=$(ViashRemoveFlags "$1") - shift 1 - ;; - --get_output_stats) - [ -n "$VIASH_PAR_GET_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--get_output_stats\': \'$VIASH_PAR_GET_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GET_OUTPUT_STATS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --get_output_stats. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --get_output_stats=*) - [ -n "$VIASH_PAR_GET_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--get_output_stats=*\': \'$VIASH_PAR_GET_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_GET_OUTPUT_STATS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_bam) - [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_bam=*) - [ -n "$VIASH_PAR_OUTPUT_BAM" ] && ViashError Bad arguments for option \'--output_bam=*\': \'$VIASH_PAR_OUTPUT_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output_stats) - [ -n "$VIASH_PAR_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--output_stats\': \'$VIASH_PAR_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_STATS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_stats. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_stats=*) - [ -n "$VIASH_PAR_OUTPUT_STATS" ] && ViashError Bad arguments for option \'--output_stats=*\': \'$VIASH_PAR_OUTPUT_STATS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_STATS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/umitools/umitools_dedup:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_PAIRED+x} ]; then - VIASH_PAR_PAIRED="false" -fi -if [ -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then - VIASH_PAR_OUTPUT_BAM="\$id.\$key.bam" -fi -if [ -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then - VIASH_PAR_OUTPUT_STATS="\$id.umi_dedup.stats" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then - ViashError "Input file '$VIASH_PAR_BAM' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_BAI" ] && [ ! -e "$VIASH_PAR_BAI" ]; then - ViashError "Input file '$VIASH_PAR_BAI' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_GET_OUTPUT_STATS" ]]; then - if ! [[ "$VIASH_PAR_GET_OUTPUT_STATS" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--get_output_stats' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_BAM")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_BAM")" -fi -if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_STATS")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_STATS")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_BAM" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM")" ) - VIASH_PAR_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM") -fi -if [ ! -z "$VIASH_PAR_BAI" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAI")" ) - VIASH_PAR_BAI=$(ViashDockerAutodetectMount "$VIASH_PAR_BAI") -fi -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_BAM")" ) - VIASH_PAR_OUTPUT_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_BAM") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_BAM" ) -fi -if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_STATS")" ) - VIASH_PAR_OUTPUT_STATS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_STATS") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_STATS" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-umitools_dedup-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\"'\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) -$( if [ ! -z ${VIASH_PAR_GET_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_GET_OUTPUT_STATS}" | sed "s#'#'\"'\"'#g;s#.*#par_get_output_stats='&'#" ; else echo "# par_get_output_stats="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "${VIASH_PAR_OUTPUT_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_output_bam='&'#" ; else echo "# par_output_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\"'\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -args="--random-seed=100" - -if \$par_paired; then - paired="--paired" - args+=" --unpaired-reads=discard --chimeric-pairs=discard" -else - paired="" -fi - -if \$par_get_output_stats; then - mkdir -p \$par_output_stats - stats="--output-stats \$par_output_stats/" -else - stats="" -fi - -PYTHONHASHSEED=0 umi_tools dedup -I \$par_bam -S \$par_output_bam \$stats \$paired \$args -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_BAM" ]; then - VIASH_PAR_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_BAM") - fi - if [ ! -z "$VIASH_PAR_BAI" ]; then - VIASH_PAR_BAI=$(ViashDockerStripAutomount "$VIASH_PAR_BAI") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ]; then - VIASH_PAR_OUTPUT_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_BAM") - fi - if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ]; then - VIASH_PAR_OUTPUT_STATS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_STATS") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT_BAM" ] && [ ! -e "$VIASH_PAR_OUTPUT_BAM" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_BAM' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_OUTPUT_STATS" ] && [ ! -e "$VIASH_PAR_OUTPUT_STATS" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT_STATS' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/umitools/umitools_extract/.config.vsh.yaml b/target/executable/umitools/umitools_extract/.config.vsh.yaml deleted file mode 100644 index b69d88b..0000000 --- a/target/executable/umitools/umitools_extract/.config.vsh.yaml +++ /dev/null @@ -1,283 +0,0 @@ -name: "umitools_extract" -namespace: "umitools" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input fastq files, either one or two (paired)" - info: null - example: - - "sample.fastq" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - - type: "string" - name: "--bc_pattern" - description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ - \ first 6 nucleotides of the read are from the UMI." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," -- name: "Output" - arguments: - - type: "file" - name: "--fastq_1" - description: "Output file for read 1." - info: null - default: - - "$id.$key.read_1.fastq" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastq_2" - description: "Output file for read 2." - info: null - default: - - "$id.$key.read_2.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -- name: "Optional arguments" - arguments: - - type: "string" - name: "--umitools_extract_method" - description: "UMI pattern to use." - info: null - default: - - "string" - required: false - choices: - - "string" - - "regex" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--umitools_umi_separator" - description: "The character that separates the UMI in the read name. Most likely\ - \ a colon if you skipped the extraction with UMI-tools and used other software." - info: null - default: - - "_" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--umitools_grouping_method" - description: "Method to use to determine read groups by subsuming those with similar\ - \ UMIs. All methods start by identifying the reads with the same mapping position,\ - \ but treat similar yet nonidentical UMIs differently." - info: null - default: - - "directional" - required: false - choices: - - "unique" - - "percentile" - - "cluster" - - "adjacency" - - "directional" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--umi_discard_read" - description: "After UMI barcode extraction discard either R1 or R2 by setting\ - \ this parameter to 1 or 2, respectively." - info: null - default: - - 0 - required: false - choices: - - 0 - - 1 - - 2 - direction: "input" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "UMI-tools contains tools for dealing with Unique Molecular Identifiers\ - \ (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See\ - \ https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component\ - \ flexible removes UMI sequences from fastq reads. UMIs are removed and appended\ - \ to the read name.\nThis component extracts UMI barcode from a read and add it\ - \ to the read name, leaving any sample barcode in place\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "scrb_seq_fastq.1.gz" -- type: "file" - path: "scrb_seq_fastq.2.gz" -- type: "file" - path: "slim.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/umitools/extract/main.nf" - - "modules/nf-core/umitools/extract/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "pip" - interactive: false - - type: "python" - user: false - packages: - - "umi_tools" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/umitools/umitools_extract/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/umitools/umitools_extract" - executable: "target/executable/umitools/umitools_extract/umitools_extract" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/umitools/umitools_extract/umitools_extract b/target/executable/umitools/umitools_extract/umitools_extract deleted file mode 100755 index 6049564..0000000 --- a/target/executable/umitools/umitools_extract/umitools_extract +++ /dev/null @@ -1,1388 +0,0 @@ -#!/usr/bin/env bash - -# umitools_extract main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="umitools_extract" -VIASH_META_FUNCTIONALITY_NAME="umitools_extract" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "umitools_extract main" - echo "" - echo "UMI-tools contains tools for dealing with Unique Molecular Identifiers" - echo "(UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See" - echo "https://umi-tools.readthedocs.io/en/latest/ for more information." - echo "This component flexible removes UMI sequences from fastq reads. UMIs are removed" - echo "and appended to the read name." - echo "This component extracts UMI barcode from a read and add it to the read name," - echo "leaving any sample barcode in place" - echo "" - echo "Input:" - echo " --paired" - echo " type: boolean" - echo " default: false" - echo " Paired fastq files or not?" - echo "" - echo " --input" - echo " type: file, required parameter, multiple values allowed, file must exist" - echo " example: sample.fastq" - echo " Input fastq files, either one or two (paired)" - echo "" - echo " --bc_pattern" - echo " type: string, multiple values allowed" - echo " The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6" - echo " nucleotides of the read are from the UMI." - echo "" - echo "Output:" - echo " --fastq_1" - echo " type: file, required parameter, output, file must exist" - echo " default: \$id.\$key.read_1.fastq" - echo " Output file for read 1." - echo "" - echo " --fastq_2" - echo " type: file, output" - echo " default: \$id.\$key.read_2.fastq" - echo " Output file for read 2." - echo "" - echo "Optional arguments:" - echo " --umitools_extract_method" - echo " type: string" - echo " default: string" - echo " choices: [ string, regex ]" - echo " UMI pattern to use." - echo "" - echo " --umitools_umi_separator" - echo " type: string" - echo " default: _" - echo " The character that separates the UMI in the read name. Most likely a" - echo " colon if you skipped the extraction with UMI-tools and used other" - echo " software." - echo "" - echo " --umitools_grouping_method" - echo " type: string" - echo " default: directional" - echo " choices: [ unique, percentile, cluster, adjacency, directional ]" - echo " Method to use to determine read groups by subsuming those with similar" - echo " UMIs. All methods start by identifying the reads with the same mapping" - echo " position, but treat similar yet nonidentical UMIs differently." - echo "" - echo " --umi_discard_read" - echo " type: integer" - echo " default: 0" - echo " choices: [ 0, 1, 2 ]" - echo " After UMI barcode extraction discard either R1 or R2 by setting this" - echo " parameter to 1 or 2, respectively." -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y pip && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "umi_tools" - -LABEL org.opencontainers.image.description="Companion container for running component umitools umitools_extract" -LABEL org.opencontainers.image.created="2024-11-27T08:42:25Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "umitools_extract main" - exit - ;; - --paired) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --paired. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --paired=*) - [ -n "$VIASH_PAR_PAIRED" ] && ViashError Bad arguments for option \'--paired=*\': \'$VIASH_PAR_PAIRED\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_PAIRED=$(ViashRemoveFlags "$1") - shift 1 - ;; - --input) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT="$2" - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - if [ -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - else - VIASH_PAR_INPUT="$VIASH_PAR_INPUT,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --bc_pattern) - if [ -z "$VIASH_PAR_BC_PATTERN" ]; then - VIASH_PAR_BC_PATTERN="$2" - else - VIASH_PAR_BC_PATTERN="$VIASH_PAR_BC_PATTERN,""$2" - fi - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bc_pattern. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bc_pattern=*) - if [ -z "$VIASH_PAR_BC_PATTERN" ]; then - VIASH_PAR_BC_PATTERN=$(ViashRemoveFlags "$1") - else - VIASH_PAR_BC_PATTERN="$VIASH_PAR_BC_PATTERN,"$(ViashRemoveFlags "$1") - fi - shift 1 - ;; - --fastq_1) - [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_1="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_1. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastq_1=*) - [ -n "$VIASH_PAR_FASTQ_1" ] && ViashError Bad arguments for option \'--fastq_1=*\': \'$VIASH_PAR_FASTQ_1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_1=$(ViashRemoveFlags "$1") - shift 1 - ;; - --fastq_2) - [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_2="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --fastq_2. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --fastq_2=*) - [ -n "$VIASH_PAR_FASTQ_2" ] && ViashError Bad arguments for option \'--fastq_2=*\': \'$VIASH_PAR_FASTQ_2\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_FASTQ_2=$(ViashRemoveFlags "$1") - shift 1 - ;; - --umitools_extract_method) - [ -n "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ] && ViashError Bad arguments for option \'--umitools_extract_method\': \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMITOOLS_EXTRACT_METHOD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_extract_method. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --umitools_extract_method=*) - [ -n "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ] && ViashError Bad arguments for option \'--umitools_extract_method=*\': \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMITOOLS_EXTRACT_METHOD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --umitools_umi_separator) - [ -n "$VIASH_PAR_UMITOOLS_UMI_SEPARATOR" ] && ViashError Bad arguments for option \'--umitools_umi_separator\': \'$VIASH_PAR_UMITOOLS_UMI_SEPARATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMITOOLS_UMI_SEPARATOR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_umi_separator. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --umitools_umi_separator=*) - [ -n "$VIASH_PAR_UMITOOLS_UMI_SEPARATOR" ] && ViashError Bad arguments for option \'--umitools_umi_separator=*\': \'$VIASH_PAR_UMITOOLS_UMI_SEPARATOR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMITOOLS_UMI_SEPARATOR=$(ViashRemoveFlags "$1") - shift 1 - ;; - --umitools_grouping_method) - [ -n "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ] && ViashError Bad arguments for option \'--umitools_grouping_method\': \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMITOOLS_GROUPING_METHOD="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --umitools_grouping_method. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --umitools_grouping_method=*) - [ -n "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ] && ViashError Bad arguments for option \'--umitools_grouping_method=*\': \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMITOOLS_GROUPING_METHOD=$(ViashRemoveFlags "$1") - shift 1 - ;; - --umi_discard_read) - [ -n "$VIASH_PAR_UMI_DISCARD_READ" ] && ViashError Bad arguments for option \'--umi_discard_read\': \'$VIASH_PAR_UMI_DISCARD_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMI_DISCARD_READ="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --umi_discard_read. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --umi_discard_read=*) - [ -n "$VIASH_PAR_UMI_DISCARD_READ" ] && ViashError Bad arguments for option \'--umi_discard_read=*\': \'$VIASH_PAR_UMI_DISCARD_READ\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_UMI_DISCARD_READ=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/umitools/umitools_extract:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_FASTQ_1+x} ]; then - ViashError '--fastq_1' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_PAIRED+x} ]; then - VIASH_PAR_PAIRED="false" -fi -if [ -z ${VIASH_PAR_FASTQ_2+x} ]; then - VIASH_PAR_FASTQ_2="\$id.\$key.read_2.fastq" -fi -if [ -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then - VIASH_PAR_UMITOOLS_EXTRACT_METHOD="string" -fi -if [ -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then - VIASH_PAR_UMITOOLS_UMI_SEPARATOR="_" -fi -if [ -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then - VIASH_PAR_UMITOOLS_GROUPING_METHOD="directional" -fi -if [ -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then - VIASH_PAR_UMI_DISCARD_READ="0" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ]; then - IFS=',' - set -f - for file in $VIASH_PAR_INPUT; do - unset IFS - if [ ! -e "$file" ]; then - ViashError "Input file '$file' does not exist." - exit 1 - fi - done - set +f -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_PAIRED" ]]; then - if ! [[ "$VIASH_PAR_PAIRED" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--paired' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_UMI_DISCARD_READ" ]]; then - if ! [[ "$VIASH_PAR_UMI_DISCARD_READ" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--umi_discard_read' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_UMITOOLS_EXTRACT_METHOD" ]; then - VIASH_PAR_UMITOOLS_EXTRACT_METHOD_CHOICES=("string;regex") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_UMITOOLS_EXTRACT_METHOD_CHOICES[*]};" =~ ";$VIASH_PAR_UMITOOLS_EXTRACT_METHOD;" ]]; then - ViashError '--umitools_extract_method' specified value of \'$VIASH_PAR_UMITOOLS_EXTRACT_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_UMITOOLS_GROUPING_METHOD" ]; then - VIASH_PAR_UMITOOLS_GROUPING_METHOD_CHOICES=("unique;percentile;cluster;adjacency;directional") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_UMITOOLS_GROUPING_METHOD_CHOICES[*]};" =~ ";$VIASH_PAR_UMITOOLS_GROUPING_METHOD;" ]]; then - ViashError '--umitools_grouping_method' specified value of \'$VIASH_PAR_UMITOOLS_GROUPING_METHOD\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -if [ ! -z "$VIASH_PAR_UMI_DISCARD_READ" ]; then - VIASH_PAR_UMI_DISCARD_READ_CHOICES=("0;1;2") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_UMI_DISCARD_READ_CHOICES[*]};" =~ ";$VIASH_PAR_UMI_DISCARD_READ;" ]]; then - ViashError '--umi_discard_read' specified value of \'$VIASH_PAR_UMI_DISCARD_READ\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_1")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQ_1")" -fi -if [ ! -z "$VIASH_PAR_FASTQ_2" ] && [ ! -d "$(dirname "$VIASH_PAR_FASTQ_2")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_FASTQ_2")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_TEST_INPUT=() - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" ) - var=$(ViashDockerAutodetectMount "$var") - VIASH_TEST_INPUT+=( "$var" ) - done - VIASH_PAR_INPUT=$(IFS=',' ; echo "${VIASH_TEST_INPUT[*]}") -fi -if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_1")" ) - VIASH_PAR_FASTQ_1=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_1") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_1" ) -fi -if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_FASTQ_2")" ) - VIASH_PAR_FASTQ_2=$(ViashDockerAutodetectMount "$VIASH_PAR_FASTQ_2") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_FASTQ_2" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-umitools_extract-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\"'\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_BC_PATTERN+x} ]; then echo "${VIASH_PAR_BC_PATTERN}" | sed "s#'#'\"'\"'#g;s#.*#par_bc_pattern='&'#" ; else echo "# par_bc_pattern="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\"'\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi ) -$( if [ ! -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_EXTRACT_METHOD}" | sed "s#'#'\"'\"'#g;s#.*#par_umitools_extract_method='&'#" ; else echo "# par_umitools_extract_method="; fi ) -$( if [ ! -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMITOOLS_UMI_SEPARATOR}" | sed "s#'#'\"'\"'#g;s#.*#par_umitools_umi_separator='&'#" ; else echo "# par_umitools_umi_separator="; fi ) -$( if [ ! -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_GROUPING_METHOD}" | sed "s#'#'\"'\"'#g;s#.*#par_umitools_grouping_method='&'#" ; else echo "# par_umitools_grouping_method="; fi ) -$( if [ ! -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then echo "${VIASH_PAR_UMI_DISCARD_READ}" | sed "s#'#'\"'\"'#g;s#.*#par_umi_discard_read='&'#" ; else echo "# par_umi_discard_read="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "\$tmpdir" -} -trap clean_up EXIT - -tmpdir=\$(mktemp -d "\$meta_temp_dir/\$meta_functionality_name-XXXXXXXX") - -IFS="," read -ra input <<< "\$par_input" -IFS="," read -ra pattern <<< "\$par_bc_pattern" - -read_count="\${#input[@]}" -pattern_count="\${#pattern[@]}" - -if [ "\$par_paired" == "true" ]; then - echo "Paired - Reads: \$read_count bc_patterns: \$pattern_count" - if [ "\$read_count" -ne 2 ] || [ "\$pattern_count" -ne 2 ]; then - echo "Paired end input requires two read files and two UMI patterns" - exit 1 - else - read1="\$(basename -- \${input[0]})" - read2="\$(basename -- \${input[1]})" - umi_tools extract \\ - -I "\${input[0]}" --read2-in="\${input[1]}" \\ - -S "\$tmpdir/\$read1" \\ - --read2-out="\$tmpdir/\$read2" \\ - --extract-method \$par_umitools_extract_method \\ - --bc-pattern "\${pattern[0]}" \\ - --bc-pattern2 "\${pattern[1]}" \\ - --umi-separator \$par_umitools_umi_separator - if [ \$par_umi_discard_read == 1 ]; then - # discard read 1 - cp \$tmpdir/\$read1 \$par_fastq_1 - elif [ \$par_umi_discard_read == 2 ]; then - # discard read 2 - cp \$tmpdir/\$read2 \$par_fastq_1 - else - cp \$tmpdir/\$read1 \$par_fastq_1 - cp \$tmpdir/\$read2 \$par_fastq_2 - fi - fi -else - echo "Not Paired - \$read_count" - if [ "\$read_count" -ne 1 ] || [ "\$pattern_count" -ne 1 ]; then - echo "Single end input requires one read file and one UMI pattern" - exit 1 - else - read1="\$(basename -- \${input[0]})" - umi_tools extract \\ - -I "\${input[0]}" -S "\$tmpdir/\$read1" \\ - --extract-method \$par_umitools_extract_method \\ - --bc-pattern "\${pattern[0]}" \\ - --umi-separator \$par_umitools_umi_separator - cp \$tmpdir/\$read1 \$par_fastq_1 - fi -fi -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - unset VIASH_TEST_INPUT - IFS=',' - for var in $VIASH_PAR_INPUT; do - unset IFS - if [ -z "$VIASH_TEST_INPUT" ]; then - VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")" - else - VIASH_TEST_INPUT="$VIASH_TEST_INPUT,""$(ViashDockerStripAutomount "$var")" - fi - done - VIASH_PAR_INPUT="$VIASH_TEST_INPUT" - fi - if [ ! -z "$VIASH_PAR_FASTQ_1" ]; then - VIASH_PAR_FASTQ_1=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_1") - fi - if [ ! -z "$VIASH_PAR_FASTQ_2" ]; then - VIASH_PAR_FASTQ_2=$(ViashDockerStripAutomount "$VIASH_PAR_FASTQ_2") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_FASTQ_1" ] && [ ! -e "$VIASH_PAR_FASTQ_1" ]; then - ViashError "Output file '$VIASH_PAR_FASTQ_1' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/umitools_prepareforquant/.config.vsh.yaml b/target/executable/umitools_prepareforquant/.config.vsh.yaml deleted file mode 100644 index d09e804..0000000 --- a/target/executable/umitools_prepareforquant/.config.vsh.yaml +++ /dev/null @@ -1,186 +0,0 @@ -name: "umitools_prepareforquant" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--bam" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output" - info: null - default: - - "$id.transcriptome_sorted.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--log" - info: null - default: - - "$id.$key.log" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -- type: "file" - path: "prepare-for-rsem.py" -description: "Fix paired-end reads in name sorted BAM file to prepare for salmon quantification" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/local/umitools_prepareforrsem.nf" - last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "pip" - interactive: false - - type: "python" - user: false - packages: - - "umi_tools" - - "pysam" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/umitools_prepareforquant/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "target/executable/umitools_prepareforquant" - executable: "target/executable/umitools_prepareforquant/umitools_prepareforquant" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/executable/umitools_prepareforquant/prepare-for-rsem.py b/target/executable/umitools_prepareforquant/prepare-for-rsem.py deleted file mode 100755 index 59dd01a..0000000 --- a/target/executable/umitools_prepareforquant/prepare-for-rsem.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 - -""" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Credits -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This script is a clone of the "prepare-for-rsem.py" script written by -Ian Sudbury, Tom Smith and other contributors to the UMI-tools package: -https://github.com/CGATOxford/UMI-tools - -It has been included here to address problems encountered with -Salmon quant and RSEM as discussed in the issue below: -https://github.com/CGATOxford/UMI-tools/issues/465 - -When the "umi_tools prepare-for-rsem" command becomes available in an official -UMI-tools release this script will be replaced and deprecated. - -Commit: -https://github.com/CGATOxford/UMI-tools/blob/bf8608d6a172c5ca0dcf33c126b4e23429177a72/umi_tools/prepare-for-rsem.py - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -prepare_for_rsem - make the output from dedup or group compatible with RSEM -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The SAM format specification states that the mnext and mpos fields should point -to the primary alignment of a read's mate. However, not all aligners adhere to -this standard. In addition, the RSEM software requires that the mate of a read1 -appears directly after it in its input BAM. This requires that there is exactly -one read1 alignment for every read2 and vice versa. - -In general (except in a few edge cases) UMI tools outputs only the read2 to that -corresponds to the read specified in the mnext and mpos positions of a selected -read1, and only outputs this read once, even if multiple read1s point to it. -This makes UMI-tools outputs incompatible with RSEM. This script takes the output -from dedup or groups and ensures that each read1 has exactly one read2 (and vice -versa), that read2 always appears directly after read1,and that pairs point to -each other (note this is technically not valid SAM format). Copy any specified -tags from read1 to read2 if they are present (by default, UG and BX, the unique -group and correct UMI tags added by _group_) - -Input must to name sorted. - - -https://raw.githubusercontent.com/CGATOxford/UMI-tools/master/LICENSE - -""" - -from umi_tools import Utilities as U -from collections import defaultdict, Counter -import pysam -import sys - -usage = """ -prepare_for_rsem - make output from dedup or group compatible with RSEM - -Usage: umi_tools prepare_for_rsem [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM] - - note: If --stdout is omited, standard out is output. To - generate a valid BAM file on standard out, please - redirect log with --log=LOGFILE or --log2stderr """ - - -def chunk_bam(bamfile): - """Take in a iterator of pysam.AlignmentSegment entries and yield - lists of reads that all share the same name""" - - last_query_name = None - output_buffer = list() - - for read in bamfile: - if last_query_name is not None and last_query_name != read.query_name: - yield (output_buffer) - output_buffer = list() - - last_query_name = read.query_name - output_buffer.append(read) - - yield (output_buffer) - - -def copy_tags(tags, read1, read2): - """Given a list of tags, copies the values of these tags from read1 - to read2, if the tag is set""" - - for tag in tags: - try: - read1_tag = read1.get_tag(tag, with_value_type=True) - read2.set_tag(tag, value=read1_tag[0], value_type=read1_tag[1]) - except KeyError: - pass - - return read2 - - -def pick_mate(read, template_dict, mate_key): - """Find the mate of read in the template dict using key. It will retrieve - all reads at that key, and then scan to pick the one that refers to _read_ - as it's mate. If there is no such read, it picks a first one it comes to""" - - mate = None - - # get a list of secondary reads at the correct alignment position - potential_mates = template_dict[not read.is_read1][mate_key] - - # search through one at a time to find a read that points to the current read - # as its mate. - for candidate_mate in potential_mates: - if ( - candidate_mate.next_reference_name == read.reference_name - and candidate_mate.next_reference_start == read.pos - ): - mate = candidate_mate - - # if no such read is found, then pick any old secondary alignment at that position - # note: this happens when UMI-tools outputs the wrong read as something's pair. - if mate is None and len(potential_mates) > 0: - mate = potential_mates[0] - - return mate - - -def main(argv=None): - if argv is None: - argv = sys.argv - - # setup command line parser - parser = U.OptionParser(version="%prog version: $Id$", usage=usage, description=globals()["__doc__"]) - group = U.OptionGroup(parser, "RSEM preparation specific options") - - group.add_option( - "--tags", - dest="tags", - type="string", - default="UG,BX", - help="Comma-separated list of tags to transfer from read1 to read2", - ) - group.add_option( - "--sam", dest="sam", action="store_true", default=False, help="input and output SAM rather than BAM" - ) - - parser.add_option_group(group) - - # add common options (-h/--help, ...) and parse command line - (options, args) = U.Start( - parser, argv=argv, add_group_dedup_options=False, add_umi_grouping_options=False, add_sam_options=False - ) - - skipped_stats = Counter() - - if options.stdin != sys.stdin: - in_name = options.stdin.name - options.stdin.close() - else: - in_name = "-" - - if options.sam: - mode = "" - else: - mode = "b" - - inbam = pysam.AlignmentFile(in_name, "r" + mode) - - if options.stdout != sys.stdout: - out_name = options.stdout.name - options.stdout.close() - else: - out_name = "-" - - outbam = pysam.AlignmentFile(out_name, "w" + mode, template=inbam) - - options.tags = options.tags.split(",") - - for template in chunk_bam(inbam): - assert len(set(r.query_name for r in template)) == 1 - current_template = {True: defaultdict(list), False: defaultdict(list)} - - for read in template: - key = (read.reference_name, read.pos, not read.is_secondary) - current_template[read.is_read1][key].append(read) - - output = set() - - for read in template: - mate = None - - # if this read is a non_primary alignment, we first want to check if it has a mate - # with the non-primary alignment flag set. - - mate_key_primary = True - mate_key_secondary = (read.next_reference_name, read.next_reference_start, False) - - # First look for a read that has the same primary/secondary status - # as read (i.e. secondary mate for secondary read, and primary mate - # for primary read) - mate_key = (read.next_reference_name, read.next_reference_start, read.is_secondary) - mate = pick_mate(read, current_template, mate_key) - - # If none was found then look for the opposite (primary mate of secondary - # read or seconadary mate of primary read) - if mate is None: - mate_key = (read.next_reference_name, read.next_reference_start, not read.is_secondary) - mate = pick_mate(read, current_template, mate_key) - - # If we still don't have a mate, then their can't be one? - if mate is None: - skipped_stats["no_mate"] += 1 - U.warn( - "Alignment {} has no mate -- skipped".format( - "\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)])) - ) - ) - continue - - # because we might want to make changes to the read, but not have those changes reflected - # if we need the read again,we copy the read. This is only way I can find to do this. - read = pysam.AlignedSegment().from_dict(read.to_dict(), read.header) - mate = pysam.AlignedSegment().from_dict(mate.to_dict(), read.header) - - # Make it so that if our read is secondary, the mate is also secondary. We don't make the - # mate primary if the read is primary because we would otherwise end up with mulitple - # primary alignments. - if read.is_secondary: - mate.is_secondary = True - - # In a situation where there is already one mate for each read, then we will come across - # each pair twice - once when we scan read1 and once when we scan read2. Thus we need - # to make sure we don't output something already output. - if read.is_read1: - mate = copy_tags(options.tags, read, mate) - output_key = str(read) + str(mate) - - if output_key not in output: - output.add(output_key) - outbam.write(read) - outbam.write(mate) - skipped_stats["pairs_output"] += 1 - - elif read.is_read2: - read = copy_tags(options.tags, mate, read) - output_key = str(mate) + str(read) - - if output_key not in output: - output.add(output_key) - outbam.write(mate) - outbam.write(read) - skipped_stats["pairs_output"] += 1 - - else: - skipped_stats["skipped_not_read_12"] += 1 - U.warn( - "Alignment {} is neither read1 nor read2 -- skipped".format( - "\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)])) - ) - ) - continue - - if not out_name == "-": - outbam.close() - - U.info( - "Total pairs output: {}, Pairs skipped - no mates: {}," - " Pairs skipped - not read1 or 2: {}".format( - skipped_stats["pairs_output"], skipped_stats["no_mate"], skipped_stats["skipped_not_read12"] - ) - ) - U.Stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/target/executable/umitools_prepareforquant/umitools_prepareforquant b/target/executable/umitools_prepareforquant/umitools_prepareforquant deleted file mode 100755 index ead7947..0000000 --- a/target/executable/umitools_prepareforquant/umitools_prepareforquant +++ /dev/null @@ -1,1115 +0,0 @@ -#!/usr/bin/env bash - -# umitools_prepareforquant main -# -# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="umitools_prepareforquant" -VIASH_META_FUNCTIONALITY_NAME="umitools_prepareforquant" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "umitools_prepareforquant main" - echo "" - echo "Fix paired-end reads in name sorted BAM file to prepare for salmon" - echo "quantification" - echo "" - echo "Input:" - echo " --bam" - echo " type: file, file must exist" - echo "" - echo "Output:" - echo " --output" - echo " type: file, output, file must exist" - echo " default: \$id.transcriptome_sorted.bam" - echo "" - echo " --log" - echo " type: file, output, file must exist" - echo " default: \$id.\$key.log" -} - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM ubuntu:22.04 -ENTRYPOINT [] -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y pip && \ - rm -rf /var/lib/apt/lists/* - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "umi_tools" "pysam" - -LABEL org.opencontainers.image.description="Companion container for running component umitools_prepareforquant" -LABEL org.opencontainers.image.created="2024-11-27T08:42:33Z" -LABEL org.opencontainers.image.source="https://x-access-token/ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -LABEL org.opencontainers.image.revision="ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" -LABEL org.opencontainers.image.version="main" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "umitools_prepareforquant main" - exit - ;; - --bam) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bam. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bam=*) - [ -n "$VIASH_PAR_BAM" ] && ViashError Bad arguments for option \'--bam=*\': \'$VIASH_PAR_BAM\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BAM=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --log) - [ -n "$VIASH_PAR_LOG" ] && ViashError Bad arguments for option \'--log\': \'$VIASH_PAR_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --log. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --log=*) - [ -n "$VIASH_PAR_LOG" ] && ViashError Bad arguments for option \'--log=*\': \'$VIASH_PAR_LOG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_LOG=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/rnaseq/umitools_prepareforquant:main' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - VIASH_PAR_OUTPUT="\$id.transcriptome_sorted.bam" -fi -if [ -z ${VIASH_PAR_LOG+x} ]; then - VIASH_PAR_LOG="\$id.\$key.log" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_BAM" ] && [ ! -e "$VIASH_PAR_BAM" ]; then - ViashError "Input file '$VIASH_PAR_BAM' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi -if [ ! -z "$VIASH_PAR_LOG" ] && [ ! -d "$(dirname "$VIASH_PAR_LOG")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_LOG")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_BAM" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_BAM")" ) - VIASH_PAR_BAM=$(ViashDockerAutodetectMount "$VIASH_PAR_BAM") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_LOG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_LOG")" ) - VIASH_PAR_LOG=$(ViashDockerAutodetectMount "$VIASH_PAR_LOG") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_LOG" ) -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-umitools_prepareforquant-XXXXXX").sh -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\"'\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\"'\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\"'\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\"'\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\"'\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\"'\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\"'\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -python3 "\$meta_resources_dir/prepare-for-rsem.py" \\ - --stdin=\$par_bam \\ - --stdout=\$par_output \\ - --log=\$par_log -VIASHMAIN -bash "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_BAM" ]; then - VIASH_PAR_BAM=$(ViashDockerStripAutomount "$VIASH_PAR_BAM") - fi - if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") - fi - if [ ! -z "$VIASH_PAR_LOG" ]; then - VIASH_PAR_LOG=$(ViashDockerStripAutomount "$VIASH_PAR_LOG") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_LOG" ] && [ ! -e "$VIASH_PAR_LOG" ]; then - ViashError "Output file '$VIASH_PAR_LOG' does not exist." - exit 1 -fi - - -exit 0 diff --git a/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml b/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml index 5d5f7d4..cde74af 100644 --- a/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml +++ b/target/executable/workflows/genome_alignment_and_quant/.config.vsh.yaml @@ -459,51 +459,57 @@ dependencies: - name: "star/star_align_reads" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_sort" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_index" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_stats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_flagstat" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_idxstats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" -- name: "umitools/umitools_dedup" +- name: "umi_tools/umi_tools_dedup" repository: - type: "local" -- name: "umitools_prepareforquant" + type: "vsh" + repo: "biobox" + tag: "main" +- name: "umi_tools/umi_tools_prepareforrsem" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "salmon/salmon_quant" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "rsem/rsem_calculate_expression" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -584,8 +590,8 @@ build_info: output: "target/executable/workflows/genome_alignment_and_quant" executable: "target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -593,10 +599,10 @@ build_info: - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats" - - "target/nextflow/umitools/umitools_dedup" - - "target/nextflow/umitools_prepareforquant" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" - - "target/nextflow/rsem/rsem_calculate_expression" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression" package_config: name: "rnaseq" version: "main" @@ -607,7 +613,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant b/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant index 37938e1..b7055be 100755 --- a/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant +++ b/target/executable/workflows/genome_alignment_and_quant/genome_alignment_and_quant @@ -1285,16 +1285,16 @@ fi # set dependency paths -VIASH_DEP_UMITOOLS_UMITOOLS_DEDUP="$VIASH_META_RESOURCES_DIR/../../../nextflow/umitools/umitools_dedup/main.nf" -VIASH_DEP_UMITOOLS_PREPAREFORQUANT="$VIASH_META_RESOURCES_DIR/../../../nextflow/umitools_prepareforquant/main.nf" -VIASH_DEP_RSEM_RSEM_CALCULATE_EXPRESSION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rsem/rsem_calculate_expression/main.nf" VIASH_DEP_STAR_STAR_ALIGN_READS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_SORT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_index/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_STATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_FLAGSTAT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_IDXSTATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf" +VIASH_DEP_UMI_TOOLS_UMI_TOOLS_DEDUP="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf" +VIASH_DEP_UMI_TOOLS_UMI_TOOLS_PREPAREFORRSEM="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" VIASH_DEP_SALMON_SALMON_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf" +VIASH_DEP_RSEM_RSEM_CALCULATE_EXPRESSION="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf" ViashDebug "Running command: $(echo $VIASH_CMD)" cat << VIASHEOF | eval $VIASH_CMD @@ -1400,167 +1400,169 @@ workflow run_wf { key: "genome_idxstats" ) - // - // Remove duplicate reads from BAM file based on UMIs - // - - // Deduplicate genome BAM file - | umitools_dedup.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "paired": "paired", - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "get_output_stats": "umi_dedup_stats" - ], - toState: [ "genome_bam_sorted": "output_bam" ], - key: "genome_deduped" - ) - | samtools_index.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "genome_bam_sorted", - "csi": "bam_csi_index" - ], - toState: [ "genome_bam_index": "output" ], - key: "genome_deduped" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_stats": "output" ], - key: "genome_deduped_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_flagstat": "output" ], - key: "genome_deduped_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta", - ], - toState: [ "genome_bam_idxstats": "output" ], - key: "genome_deduped_idxstats" - ) + // + // Remove duplicate reads from BAM file based on UMIs + // + + // Deduplicate genome BAM file + | umi_tools_dedup.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.genome_bam, + bai: state.genome_bam_index, + output_stats: output_stats] + }, + toState: [ "genome_bam_sorted": "output" ], + key: "genome_deduped" + ) + | samtools_index.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_deduped" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_deduped_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_deduped_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta", + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_deduped_idxstats" + ) // Deduplicate transcriptome BAM file - | samtools_sort.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ "input": "transcriptome_bam" ], - toState: [ "transcriptome_bam": "output" ], - key: "transcriptome_sorted" - ) - | samtools_index.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "csi": "bam_csi_index" - ], - toState: [ "transcriptome_bam_index": "output" ], - key: "transcriptome_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "bai": "transcriptome_bam_index", - ], - toState: [ "transcriptome_bam_stats": "output" ], - key: "transcriptome_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_flagstat": "output" ], - key: "transcriptome_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_idxstats": "output" ], - key: "transcriptome_idxstats" - ) - - | umitools_dedup.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "paired": "paired", - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index", - "get_output_stats": "umi_dedup_stats", - ], - toState: [ "transcriptome_bam_deduped": "output_bam" ], - key: "transcriptome_deduped" - ) - | samtools_sort.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ "input": "transcriptome_bam_deduped" ], - toState: [ "transcriptome_bam": "output" ], - key: "transcriptome_deduped_sorted" - ) - | samtools_index.run ( + | samtools_sort.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_sorted" + ) + | samtools_index.run ( runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, fromState: [ "input": "transcriptome_bam", "csi": "bam_csi_index" ], toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index", + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_idxstats" + ) + + | umi_tools_dedup.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.transcriptome_bam, + bai: state.transcriptome_bam_index, + output_stats: output_stats] + }, + toState: [ "transcriptome_bam_deduped": "output" ], + key: "transcriptome_deduped" + ) + | samtools_sort.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam_deduped" ], + toState: [ "transcriptome_bam": "output" ], key: "transcriptome_deduped_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_stats": "output" ], - key: "transcriptome_deduped_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_flagstat": "output" ], - key: "transcriptome_deduped_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_idxstats": "output" ], - key: "transcriptome_deduped_idxstats" - ) + ) + | samtools_index.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_deduped_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_deduped_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_deduped_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_deduped_idxstats" + ) - // Fix paired-end reads in name sorted BAM file - | umitools_prepareforquant.run ( - runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, - fromState: [ "bam": "transcriptome_bam" ], - toState: [ "transcriptome_bam": "output" ] - ) + // Fix paired-end reads in name sorted BAM file + | umi_tools_prepareforrsem.run ( + runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ] + ) // Infer lib-type for salmon quant | map { id, state -> @@ -1597,78 +1599,91 @@ workflow run_wf { ] ) - | map { id, state -> - def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state - [ id, mod_state ] - } - - | rsem_calculate_expression.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "id": "id", - "strandedness": "strandedness", - "paired": "paired", - "input": "input", - "index": "rsem_index", - "extra_args": "extra_rsem_calculate_expression_args" - ], - toState: [ - "rsem_counts_gene": "counts_gene", - "rsem_counts_transcripts": "counts_transcripts", - "rsem_multiqc": "stat", - "star_multiqc": "logs", - "bam_star_rsem": "bam_star", - "bam_genome_rsem": "bam_genome", - "bam_transcript_rsem": "bam_transcript" - ] - ) - - // RSEM_Star BAM - | samtools_sort.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: ["input": "bam_star_rsem"], - toState: ["genome_bam_sorted": "output"], - key: "genome_sorted" - ) - | samtools_index.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "input": "genome_bam_sorted", - "csi": "bam_csi_index" - ], - toState: [ "genome_bam_index": "output" ], - key: "genome_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "input": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_stats": "output" ], - key: "genome_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_flagstat": "output" ], - key: "genome_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_idxstats": "output" ], - key: "genome_idxstats" - ) + | map { id, state -> + def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | rsem_calculate_expression.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "id": "id", + "strandedness": "strandedness", + "paired": "paired", + "input": "input", + "index": "rsem_index", + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts", + "stat": "rsem_multiqc", + "logs": "star_multiqc", + "bam_star": "bam_star_rsem", + "bam_genome": "bam_genome_rsem", + "bam_transcript": "bam_transcript_rsem" + ], + args: [ + star: true, + star_output_genome_bam: true, + star_gzipped_read_file: true, + estimate_rspd: true, + seed: 1 + ], + toState: [ + "rsem_counts_gene": "counts_gene", + "rsem_counts_transcripts": "counts_transcripts", + "rsem_multiqc": "stat", + "star_multiqc": "logs", + "bam_star_rsem": "bam_star", + "bam_genome_rsem": "bam_genome", + "bam_transcript_rsem": "bam_transcript" + ] + ) + + // RSEM_Star BAM + | samtools_sort.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: ["input": "bam_star_rsem"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted" + ) + | samtools_index.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats" + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/target/executable/workflows/merge_quant_results/.config.vsh.yaml b/target/executable/workflows/merge_quant_results/.config.vsh.yaml index ddbf642..74ed758 100644 --- a/target/executable/workflows/merge_quant_results/.config.vsh.yaml +++ b/target/executable/workflows/merge_quant_results/.config.vsh.yaml @@ -197,7 +197,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -278,8 +278,8 @@ build_info: output: "target/executable/workflows/merge_quant_results" executable: "target/executable/workflows/merge_quant_results/merge_quant_results" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/tx2gene" - "target/nextflow/tximport" @@ -294,7 +294,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/post_processing/.config.vsh.yaml b/target/executable/workflows/post_processing/.config.vsh.yaml index b48453f..4fbcc02 100644 --- a/target/executable/workflows/post_processing/.config.vsh.yaml +++ b/target/executable/workflows/post_processing/.config.vsh.yaml @@ -124,17 +124,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_bedtools_args" - description: "Extra arguments to pass to bedtools genomecov command in addition\ - \ to defaults defined by the pipeline." - info: null - default: - - "" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "boolean" name: "--bam_csi_index" description: "Create a CSI index for BAM files instead of the traditional BAI\ @@ -368,34 +357,36 @@ dependencies: - name: "samtools/samtools_sort" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_index" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_stats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_flagstat" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_idxstats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "stringtie" repository: type: "local" -- name: "bedtools_genomecov" +- name: "bedtools/bedtools_genomecov" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "ucsc/bedclip" repository: type: "local" @@ -405,7 +396,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -486,8 +477,8 @@ build_info: output: "target/executable/workflows/post_processing" executable: "target/executable/workflows/post_processing/post_processing" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/picard_markduplicates" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -496,7 +487,7 @@ build_info: - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats" - "target/nextflow/stringtie" - - "target/nextflow/bedtools_genomecov" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov" - "target/nextflow/ucsc/bedclip" - "target/nextflow/ucsc/bedgraphtobigwig" package_config: @@ -509,7 +500,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/post_processing/post_processing b/target/executable/workflows/post_processing/post_processing index 7a67893..8c32850 100755 --- a/target/executable/workflows/post_processing/post_processing +++ b/target/executable/workflows/post_processing/post_processing @@ -232,12 +232,6 @@ function ViashHelp { echo " Perform reference-guided de novo assembly of transcripts using" echo " StringTie, i.e. don't restrict to those in GTF file." echo "" - echo " --extra_bedtools_args" - echo " type: string" - echo " default:" - echo " Extra arguments to pass to bedtools genomecov command in addition to" - echo " defaults defined by the pipeline." - echo "" echo " --bam_csi_index" echo " type: boolean" echo " default: false" @@ -485,17 +479,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_STRINGTIE_IGNORE_GTF=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_bedtools_args) - [ -n "$VIASH_PAR_EXTRA_BEDTOOLS_ARGS" ] && ViashError Bad arguments for option \'--extra_bedtools_args\': \'$VIASH_PAR_EXTRA_BEDTOOLS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_BEDTOOLS_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_bedtools_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_bedtools_args=*) - [ -n "$VIASH_PAR_EXTRA_BEDTOOLS_ARGS" ] && ViashError Bad arguments for option \'--extra_bedtools_args=*\': \'$VIASH_PAR_EXTRA_BEDTOOLS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_BEDTOOLS_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --bam_csi_index) [ -n "$VIASH_PAR_BAM_CSI_INDEX" ] && ViashError Bad arguments for option \'--bam_csi_index\': \'$VIASH_PAR_BAM_CSI_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_BAM_CSI_INDEX="$2" @@ -877,9 +860,6 @@ fi if [ -z ${VIASH_PAR_EXTRA_STRINGTIE_ARGS+x} ]; then VIASH_PAR_EXTRA_STRINGTIE_ARGS="" fi -if [ -z ${VIASH_PAR_EXTRA_BEDTOOLS_ARGS+x} ]; then - VIASH_PAR_EXTRA_BEDTOOLS_ARGS="" -fi if [ -z ${VIASH_PAR_BAM_CSI_INDEX+x} ]; then VIASH_PAR_BAM_CSI_INDEX="false" fi @@ -1140,7 +1120,6 @@ fi # set dependency paths VIASH_DEP_PICARD_MARKDUPLICATES="$VIASH_META_RESOURCES_DIR/../../../nextflow/picard_markduplicates/main.nf" VIASH_DEP_STRINGTIE="$VIASH_META_RESOURCES_DIR/../../../nextflow/stringtie/main.nf" -VIASH_DEP_BEDTOOLS_GENOMECOV="$VIASH_META_RESOURCES_DIR/../../../nextflow/bedtools_genomecov/main.nf" VIASH_DEP_UCSC_BEDCLIP="$VIASH_META_RESOURCES_DIR/../../../nextflow/ucsc/bedclip/main.nf" VIASH_DEP_UCSC_BEDGRAPHTOBIGWIG="$VIASH_META_RESOURCES_DIR/../../../nextflow/ucsc/bedgraphtobigwig/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_SORT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort/main.nf" @@ -1148,6 +1127,7 @@ VIASH_DEP_SAMTOOLS_SAMTOOLS_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox VIASH_DEP_SAMTOOLS_SAMTOOLS_STATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_FLAGSTAT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf" VIASH_DEP_SAMTOOLS_SAMTOOLS_IDXSTATS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf" +VIASH_DEP_BEDTOOLS_BEDTOOLS_GENOMECOV="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf" ViashDebug "Running command: $(echo $VIASH_CMD)" cat << VIASHEOF | eval $VIASH_CMD @@ -1250,18 +1230,35 @@ workflow run_wf { // Genome-wide coverage with BEDTools - | bedtools_genomecov.run ( - runIf: { id, state -> !state.skip_bigwig }, - fromState: [ - "strandedness": "strandedness", - "bam": "processed_genome_bam", - "extra_bedtools_args": "extra_bedtools_args" - ], - toState: [ - "bedgraph_forward": "bedgraph_forward", - "bedgraph_reverse": "bedgraph_reverse" - ] - ) + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "+" + ], + toState: [ "bedgraph_forward": "output" ], + key: "bedtools_genomecov_forward" + ) + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "-" + ], + toState: [ "bedgraph_reverse": "output" ], + key: "bedtools_genomecov_reverse" + ) | bedclip.run ( runIf: { id, state -> !state.skip_bigwig }, diff --git a/target/executable/workflows/pre_processing/.config.vsh.yaml b/target/executable/workflows/pre_processing/.config.vsh.yaml index 6534d1a..7f29af8 100644 --- a/target/executable/workflows/pre_processing/.config.vsh.yaml +++ b/target/executable/workflows/pre_processing/.config.vsh.yaml @@ -57,19 +57,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "file" - name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ - \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ - \ columns - short name and full path to reference genome(s)" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "file" name: "--ribo_database_manifest" description: "Text file containing paths to fasta files (one per line) that will\ @@ -267,15 +254,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_trimgalore_args" - description: "Extra arguments to pass to Trim Galore! command in addition to defaults\ - \ defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--min_trimmed_reads" description: "Minimum number of trimmed reads below which samples are removed\ @@ -308,19 +286,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" -- name: "Alignment options" - arguments: - - type: "string" - name: "--extra_salmon_quant_args" - description: "Extra arguments to pass to salmon quant command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - "" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Read filtering options" arguments: - type: "boolean_true" @@ -333,19 +298,6 @@ argument_groups: description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." info: null direction: "input" -- name: "Other options" - arguments: - - type: "string" - name: "--extra_fq_subsample_args" - description: "Extra arguments to pass to fq subsample command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - "--record-count 1000000 --seed 1" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Output" arguments: - type: "file" @@ -353,7 +305,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "$id.read_1.fastq" + - "${id}_r1.fastq.gz" must_exist: false create_parent: true required: false @@ -365,7 +317,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "$id.read_2.fastq" + - "${id}_r2.fastq.gz" must_exist: false create_parent: true required: false @@ -377,7 +329,7 @@ argument_groups: description: "FastQC HTML report for read 1." info: null default: - - "$id.read_1.fastqc.html" + - "${id}_r1.fastqc.html" must_exist: false create_parent: true required: false @@ -389,7 +341,7 @@ argument_groups: description: "FastQC HTML report for read 2." info: null default: - - "$id.read_2.fastqc.html" + - "${id}_r2.fastqc.html" must_exist: false create_parent: true required: false @@ -401,7 +353,7 @@ argument_groups: description: "FastQC report archive for read 1." info: null default: - - "$id.read_1.fastqc.zip" + - "${id}_r1.fastqc.zip" must_exist: false create_parent: true required: false @@ -413,7 +365,7 @@ argument_groups: description: "FastQC report archive for read 2." info: null default: - - "$id.read_2.fastqc.zip" + - "${id}_r2.fastqc.zip" must_exist: false create_parent: true required: false @@ -424,7 +376,7 @@ argument_groups: name: "--trim_log_1" info: null default: - - "$id.read_1.trimming_report.txt" + - "${id}_r1.trimming_report.txt" must_exist: false create_parent: true required: false @@ -435,7 +387,7 @@ argument_groups: name: "--trim_log_2" info: null default: - - "$id.read_2.trimming_report.txt" + - "${id}_r2.trimming_report.txt" must_exist: false create_parent: true required: false @@ -446,7 +398,7 @@ argument_groups: name: "--trim_html_1" info: null default: - - "$id.read_1.trimmed_fastqc.html" + - "${id}_r1.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -457,7 +409,7 @@ argument_groups: name: "--trim_html_2" info: null default: - - "$id.read_2.trimmed_fastqc.html" + - "${id}_r2.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -468,7 +420,7 @@ argument_groups: name: "--trim_zip_1" info: null default: - - "$id.read_1.trimmed_fastqc.zip" + - "${id}_r1.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -479,7 +431,7 @@ argument_groups: name: "--trim_zip_2" info: null default: - - "$id.read_2.trimmed_fastqc.zip" + - "${id}_r2.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -558,41 +510,48 @@ requirements: dependencies: - name: "fastqc" repository: - type: "local" -- name: "umitools/umitools_extract" - repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "umi_tools/umi_tools_extract" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "trimgalore" repository: - type: "local" -- name: "bbmap_bbsplit" + type: "vsh" + repo: "biobox" + tag: "main" +- name: "bbmap/bbmap_bbsplit" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "sortmerna" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "fastp" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "fq_subsample" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "salmon/salmon_quant" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -673,17 +632,16 @@ build_info: output: "target/executable/workflows/pre_processing" executable: "target/executable/workflows/pre_processing/pre_processing" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - - "target/nextflow/fastqc" - - "target/nextflow/umitools/umitools_extract" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc" - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract" - - "target/nextflow/trimgalore" - - "target/nextflow/bbmap_bbsplit" - - "target/nextflow/sortmerna" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna" - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastp" - - "target/nextflow/fq_subsample" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" package_config: name: "rnaseq" @@ -695,7 +653,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/pre_processing/pre_processing b/target/executable/workflows/pre_processing/pre_processing index ad03fb0..4f45ec0 100755 --- a/target/executable/workflows/pre_processing/pre_processing +++ b/target/executable/workflows/pre_processing/pre_processing @@ -200,13 +200,6 @@ function ViashHelp { echo " type: file, file must exist" echo " BBsplit index" echo "" - echo " --bbsplit_fasta_list" - echo " type: file, file must exist" - echo " Path to comma-separated file containing a list of reference genomes to" - echo " filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must" - echo " be explicitly set to \"false\". The file should contain 2 (comma" - echo " separated) columns - short name and full path to reference genome(s)" - echo "" echo " --ribo_database_manifest" echo " type: file, file must exist" echo " Text file containing paths to fasta files (one per line) that will be" @@ -302,11 +295,6 @@ function ViashHelp { echo " choices: [ trimgalore, fastp ]" echo " Specify the trimming tool to use." echo "" - echo " --extra_trimgalore_args" - echo " type: string" - echo " Extra arguments to pass to Trim Galore! command in addition to defaults" - echo " defined by the pipeline." - echo "" echo " --min_trimmed_reads" echo " type: integer" echo " default: 10000" @@ -324,13 +312,6 @@ function ViashHelp { echo " default: false" echo " Save the trimmed FastQ files in the results directory." echo "" - echo "Alignment options:" - echo " --extra_salmon_quant_args" - echo " type: string" - echo " default:" - echo " Extra arguments to pass to salmon quant command in addition to defaults" - echo " defined by the pipeline." - echo "" echo "Read filtering options:" echo " --skip_bbsplit" echo " type: boolean_true" @@ -340,67 +321,60 @@ function ViashHelp { echo " type: boolean_true" echo " Enable the removal of reads derived from ribosomal RNA using SortMeRNA." echo "" - echo "Other options:" - echo " --extra_fq_subsample_args" - echo " type: string" - echo " default: --record-count 1000000 --seed 1" - echo " Extra arguments to pass to fq subsample command in addition to defaults" - echo " defined by the pipeline." - echo "" echo "Output:" echo " --qc_output1" echo " type: file, output" - echo " default: \$id.read_1.fastq" + echo " default: \${id}_r1.fastq.gz" echo " Path to output directory" echo "" echo " --qc_output2" echo " type: file, output" - echo " default: \$id.read_2.fastq" + echo " default: \${id}_r2.fastq.gz" echo " Path to output directory" echo "" echo " --fastqc_html_1" echo " type: file, output" - echo " default: \$id.read_1.fastqc.html" + echo " default: \${id}_r1.fastqc.html" echo " FastQC HTML report for read 1." echo "" echo " --fastqc_html_2" echo " type: file, output" - echo " default: \$id.read_2.fastqc.html" + echo " default: \${id}_r2.fastqc.html" echo " FastQC HTML report for read 2." echo "" echo " --fastqc_zip_1" echo " type: file, output" - echo " default: \$id.read_1.fastqc.zip" + echo " default: \${id}_r1.fastqc.zip" echo " FastQC report archive for read 1." echo "" echo " --fastqc_zip_2" echo " type: file, output" - echo " default: \$id.read_2.fastqc.zip" + echo " default: \${id}_r2.fastqc.zip" echo " FastQC report archive for read 2." echo "" echo " --trim_log_1" echo " type: file, output" - echo " default: \$id.read_1.trimming_report.txt" + echo " default: \${id}_r1.trimming_report.txt" echo "" echo " --trim_log_2" echo " type: file, output" - echo " default: \$id.read_2.trimming_report.txt" + echo " default: \${id}_r2.trimming_report.txt" echo "" echo " --trim_html_1" echo " type: file, output" - echo " default: \$id.read_1.trimmed_fastqc.html" + echo " default: \${id}_r1.trimmed_fastqc.html" echo "" echo " --trim_html_2" echo " type: file, output" - echo " default: \$id.read_2.trimmed_fastqc.html" + echo " default: \${id}_r2.trimmed_fastqc.html" echo "" echo " --trim_zip_1" echo " type: file, output" - echo " default: \$id.read_1.trimmed_fastqc.zip" + echo " default: \${id}_r1.trimmed_fastqc.zip" echo "" echo " --trim_zip_2" echo " type: file, output" - echo " default: \$id.read_2.trimmed_fastqc.zip" + echo " default: \${id}_r2.trimmed_fastqc.zip" echo "" echo " --sortmerna_log" echo " type: file, output" @@ -511,17 +485,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_BBSPLIT_INDEX=$(ViashRemoveFlags "$1") shift 1 ;; - --bbsplit_fasta_list) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_fasta_list. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --bbsplit_fasta_list=*) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list=*\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") - shift 1 - ;; --ribo_database_manifest) [ -n "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && ViashError Bad arguments for option \'--ribo_database_manifest\': \'$VIASH_PAR_RIBO_DATABASE_MANIFEST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_RIBO_DATABASE_MANIFEST="$2" @@ -709,17 +672,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_TRIMMER=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_trimgalore_args) - [ -n "$VIASH_PAR_EXTRA_TRIMGALORE_ARGS" ] && ViashError Bad arguments for option \'--extra_trimgalore_args\': \'$VIASH_PAR_EXTRA_TRIMGALORE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_TRIMGALORE_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_trimgalore_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_trimgalore_args=*) - [ -n "$VIASH_PAR_EXTRA_TRIMGALORE_ARGS" ] && ViashError Bad arguments for option \'--extra_trimgalore_args=*\': \'$VIASH_PAR_EXTRA_TRIMGALORE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_TRIMGALORE_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --min_trimmed_reads) [ -n "$VIASH_PAR_MIN_TRIMMED_READS" ] && ViashError Bad arguments for option \'--min_trimmed_reads\': \'$VIASH_PAR_MIN_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_MIN_TRIMMED_READS="$2" @@ -753,17 +705,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_SAVE_TRIMMED=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_salmon_quant_args) - [ -n "$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS" ] && ViashError Bad arguments for option \'--extra_salmon_quant_args\': \'$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_SALMON_QUANT_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_salmon_quant_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_salmon_quant_args=*) - [ -n "$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS" ] && ViashError Bad arguments for option \'--extra_salmon_quant_args=*\': \'$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_SALMON_QUANT_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --skip_bbsplit) [ -n "$VIASH_PAR_SKIP_BBSPLIT" ] && ViashError Bad arguments for option \'--skip_bbsplit\': \'$VIASH_PAR_SKIP_BBSPLIT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_SKIP_BBSPLIT=true @@ -774,17 +715,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_REMOVE_RIBO_RNA=true shift 1 ;; - --extra_fq_subsample_args) - [ -n "$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS" ] && ViashError Bad arguments for option \'--extra_fq_subsample_args\': \'$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_fq_subsample_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_fq_subsample_args=*) - [ -n "$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS" ] && ViashError Bad arguments for option \'--extra_fq_subsample_args=*\': \'$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --qc_output1) [ -n "$VIASH_PAR_QC_OUTPUT1" ] && ViashError Bad arguments for option \'--qc_output1\': \'$VIASH_PAR_QC_OUTPUT1\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_QC_OUTPUT1="$2" @@ -1154,53 +1084,47 @@ fi if [ -z ${VIASH_PAR_SAVE_TRIMMED+x} ]; then VIASH_PAR_SAVE_TRIMMED="false" fi -if [ -z ${VIASH_PAR_EXTRA_SALMON_QUANT_ARGS+x} ]; then - VIASH_PAR_EXTRA_SALMON_QUANT_ARGS="" -fi if [ -z ${VIASH_PAR_SKIP_BBSPLIT+x} ]; then VIASH_PAR_SKIP_BBSPLIT="false" fi if [ -z ${VIASH_PAR_REMOVE_RIBO_RNA+x} ]; then VIASH_PAR_REMOVE_RIBO_RNA="false" fi -if [ -z ${VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS+x} ]; then - VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS="--record-count 1000000 --seed 1" -fi if [ -z ${VIASH_PAR_QC_OUTPUT1+x} ]; then - VIASH_PAR_QC_OUTPUT1="\$id.read_1.fastq" + VIASH_PAR_QC_OUTPUT1="\${id}_r1.fastq.gz" fi if [ -z ${VIASH_PAR_QC_OUTPUT2+x} ]; then - VIASH_PAR_QC_OUTPUT2="\$id.read_2.fastq" + VIASH_PAR_QC_OUTPUT2="\${id}_r2.fastq.gz" fi if [ -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then - VIASH_PAR_FASTQC_HTML_1="\$id.read_1.fastqc.html" + VIASH_PAR_FASTQC_HTML_1="\${id}_r1.fastqc.html" fi if [ -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then - VIASH_PAR_FASTQC_HTML_2="\$id.read_2.fastqc.html" + VIASH_PAR_FASTQC_HTML_2="\${id}_r2.fastqc.html" fi if [ -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then - VIASH_PAR_FASTQC_ZIP_1="\$id.read_1.fastqc.zip" + VIASH_PAR_FASTQC_ZIP_1="\${id}_r1.fastqc.zip" fi if [ -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then - VIASH_PAR_FASTQC_ZIP_2="\$id.read_2.fastqc.zip" + VIASH_PAR_FASTQC_ZIP_2="\${id}_r2.fastqc.zip" fi if [ -z ${VIASH_PAR_TRIM_LOG_1+x} ]; then - VIASH_PAR_TRIM_LOG_1="\$id.read_1.trimming_report.txt" + VIASH_PAR_TRIM_LOG_1="\${id}_r1.trimming_report.txt" fi if [ -z ${VIASH_PAR_TRIM_LOG_2+x} ]; then - VIASH_PAR_TRIM_LOG_2="\$id.read_2.trimming_report.txt" + VIASH_PAR_TRIM_LOG_2="\${id}_r2.trimming_report.txt" fi if [ -z ${VIASH_PAR_TRIM_HTML_1+x} ]; then - VIASH_PAR_TRIM_HTML_1="\$id.read_1.trimmed_fastqc.html" + VIASH_PAR_TRIM_HTML_1="\${id}_r1.trimmed_fastqc.html" fi if [ -z ${VIASH_PAR_TRIM_HTML_2+x} ]; then - VIASH_PAR_TRIM_HTML_2="\$id.read_2.trimmed_fastqc.html" + VIASH_PAR_TRIM_HTML_2="\${id}_r2.trimmed_fastqc.html" fi if [ -z ${VIASH_PAR_TRIM_ZIP_1+x} ]; then - VIASH_PAR_TRIM_ZIP_1="\$id.read_1.trimmed_fastqc.zip" + VIASH_PAR_TRIM_ZIP_1="\${id}_r1.trimmed_fastqc.zip" fi if [ -z ${VIASH_PAR_TRIM_ZIP_2+x} ]; then - VIASH_PAR_TRIM_ZIP_2="\$id.read_2.trimmed_fastqc.zip" + VIASH_PAR_TRIM_ZIP_2="\${id}_r2.trimmed_fastqc.zip" fi if [ -z ${VIASH_PAR_SORTMERNA_LOG+x} ]; then VIASH_PAR_SORTMERNA_LOG="\$id.sortmerna.log" @@ -1224,10 +1148,6 @@ if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_BBSPLIT_INDEX" ]; t ViashError "Input file '$VIASH_PAR_BBSPLIT_INDEX' does not exist." exit 1 fi -if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && [ ! -e "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then - ViashError "Input file '$VIASH_PAR_BBSPLIT_FASTA_LIST' does not exist." - exit 1 -fi if [ ! -z "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ] && [ ! -e "$VIASH_PAR_RIBO_DATABASE_MANIFEST" ]; then ViashError "Input file '$VIASH_PAR_RIBO_DATABASE_MANIFEST' does not exist." exit 1 @@ -1504,14 +1424,13 @@ fi # set dependency paths -VIASH_DEP_FASTQC="$VIASH_META_RESOURCES_DIR/../../../nextflow/fastqc/main.nf" -VIASH_DEP_UMITOOLS_UMITOOLS_EXTRACT="$VIASH_META_RESOURCES_DIR/../../../nextflow/umitools/umitools_extract/main.nf" -VIASH_DEP_TRIMGALORE="$VIASH_META_RESOURCES_DIR/../../../nextflow/trimgalore/main.nf" -VIASH_DEP_BBMAP_BBSPLIT="$VIASH_META_RESOURCES_DIR/../../../nextflow/bbmap_bbsplit/main.nf" -VIASH_DEP_SORTMERNA="$VIASH_META_RESOURCES_DIR/../../../nextflow/sortmerna/main.nf" -VIASH_DEP_FQ_SUBSAMPLE="$VIASH_META_RESOURCES_DIR/../../../nextflow/fq_subsample/main.nf" +VIASH_DEP_FASTQC="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf" VIASH_DEP_UMI_TOOLS_UMI_TOOLS_EXTRACT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/main.nf" +VIASH_DEP_TRIMGALORE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf" +VIASH_DEP_BBMAP_BBMAP_BBSPLIT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf" +VIASH_DEP_SORTMERNA="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf" VIASH_DEP_FASTP="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/fastp/main.nf" +VIASH_DEP_FQ_SUBSAMPLE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf" VIASH_DEP_SALMON_SALMON_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf" ViashDebug "Running command: $(echo $VIASH_CMD)" @@ -1546,48 +1465,58 @@ workflow run_wf { [ id, state + [paired: paired, input: input] ] } - // Perform QC on input fastq files | fastqc.run ( runIf: { id, state -> !state.skip_qc && !state.skip_fastqc }, - fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ paired: state.paired, - input: input ] - }, - toState: [ - "fastqc_html_1": "fastqc_html_1", - "fastqc_html_2": "fastqc_html_2", - "fastqc_zip_1": "fastqc_zip_1", - "fastqc_zip_2": "fastqc_zip_2" - ] + fromState: [ "input": "input" ], + toState: {id, output_state, state -> + def newKeys = [ + "fastqc_html_1":output_state["html"][0], + "fastqc_html_2": output_state["html"][1], + "fastqc_zip_1": output_state["zip"][0], + "fastqc_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"] ) // Extract UMIs from fastq files and discard read 1 or read 2 if required - | umitools_extract.run ( + | umi_tools_extract.run ( runIf: { id, state -> state.with_umi && !state.skip_umi_extract }, fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - def bc_pattern = state.paired ? [ state.umitools_bc_pattern, state.umitools_bc_pattern2 ] : [ state.umitools_bc_pattern ] - [ paired: state.paired, - input: input, - bc_pattern: bc_pattern, - umi_discard_read: state.umi_discard_read ] + def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2) + def output = "\${id}.r1.fastq.gz" + def read2_out = state.paired ? "\${id}.r2.fastq.gz" : state.remove(state.fastq_2) + [ input: state.fastq_1, + read2_in: state.fastq_2, + bc_pattern: state.umitools_bc_pattern, + bc_pattern2: bc_pattern2, + extract_method: state.umitools_extract_method, + umi_separator: state.umitools_umi_separator, + grouping_method: state.umitools_grouping_method, + output: output, + read2_out: read2_out ] }, toState: [ - "fastq_1": "fastq_1", - "fastq_2": "fastq_2" + "fastq_1": "output", + "fastq_2": "read2_out" ] ) // Discard read if required | map { id, state -> def paired = state.paired + def fastq_1 = state.fastq_1 def fastq_2 = state.fastq_2 if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) { - fastq_2 = state.remove(state.fastq_2) + if (state.umi_discard_read == 1) { + fastq_1 = fastq_2 + } + fastq_2 = state.remove(state.fastq_2) paired = false } - [ id, state + [paired: paired, fastq_2: fastq_2] ] + [ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ] } // Trim reads using Trim galore! @@ -1597,8 +1526,11 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] [ paired: state.paired, input: input, - min_trimmed_reads: state.min_trimmed_reads ] + min_trimmed_reads: state.min_trimmed_reads, + trimmed_r1: state.qc_output1, + trimmed_r2: state.qc_output2 ] }, + args: [gzip: true, fastqc: true], toState: [ "fastq_1": "trimmed_r1", "fastq_2": "trimmed_r2", @@ -1608,21 +1540,22 @@ workflow run_wf { "trim_zip_2": "trimmed_fastqc_zip_2", "trim_html_1": "trimmed_fastqc_html_1", "trim_html_2": "trimmed_fastqc_html_2" - ], - args: [gzip: true, fastqc: true] + ] ) // Trim reads using fastp | fastp.run( runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, - fromState: [ - "in1": "fastq_1", - "in2": "fastq_2", - "merge": "fastp_save_merged", - "interleaved_in": "interleaved_reads", - "detect_adapter_for_pe": "fastp_pe_detect_adapter", - "adapter_fasta": "fastp_adapter_fasta" - ], + fromState: { id, state -> + def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + [ in1: state.fastq_1, + in2: state.fastq_2, + merge: state.fastp_save_merged, + interleaved_in: state.interleaved_reads, + detect_adapter_for_pe: state.paired, + adapter_fasta: state.fastp_adapter_fasta ] + outputState + }, toState: [ "fastq_1": "out1", "fastq_2": "out2", @@ -1636,19 +1569,23 @@ workflow run_wf { ) // Perform FASTQC on reads trimmed using fastp - | fastqc.run( + | fastqc.run ( runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, fromState: { id, state -> def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ paired: state.paired, - input: input ] - }, - toState: [ - "trim_html_1": "fastqc_html_1", - "trim_html_2": "fastqc_html_2", - "trim_zip_1": "fastqc_zip_1", - "trim_zip_2": "fastqc_zip_2" - ], + [ input: input ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trim_html_1":output_state["html"][0], + "trim_html_2": output_state["html"][1], + "trim_zip_1": output_state["zip"][0], + "trim_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], key: "fastqc_trimming" ) @@ -1659,7 +1596,7 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] [ paired: state.paired, input: input, - built_bbsplit_index: state.bbsplit_index ] + build: state.bbsplit_index ] }, args: ["only_build_index": false], toState: [ @@ -1675,27 +1612,44 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] def filePaths = state.ribo_database_manifest.readLines() def refs = filePaths.collect { it } - [ paired: state.paired, + def other = "\${id}_non_rRNA_reads/" + [ paired_in: state.paired, input: input, - ribo_database_manifest: refs ] + ref: refs, + out2: state.paired, + other: other ] }, - toState: [ - "fastq_1": "fastq_1", - "fastq_2": "fastq_2", - "sortmerna_log": "sortmerna_log" - ] + args: [fastx: true, num_alignments: 1], + toState: { id, output_state, state -> + def newKeys = [ + "sortmerna_output": output_state["other"], + "sortmerna_log": output_state["log"] + ] + def new_state = state + newKeys + return new_state + } ) + | map { id, state -> + if (state.remove_ribo_rna) { + def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"} + def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"} + [ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ] + } else { + [ id, state ] + } + } // Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness | fq_subsample.run ( runIf: { id, state -> state.strandedness == 'auto' }, - fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ - input: input, - extra_args: state.extra_fq_subsample_args - ] + fromState: { id, state -> + def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState }, + args: [ + record_count: 1000, + seed: 1 + ], toState: [ "subsampled_fastq_1": "output_1", "subsampled_fastq_2": "output_2" @@ -1721,6 +1675,7 @@ workflow run_wf { ) [ id, state + [lib_type: lib_type] ] } + | salmon_quant.run ( runIf: { id, state -> state.strandedness == 'auto' }, fromState: { id, state -> @@ -1738,17 +1693,17 @@ workflow run_wf { toState: [ "salmon_quant_output": "output" ] ) - | map { id, state -> - def mod_state = (!state.paired) ? - [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : - [] - [ id, state + mod_state ] - } + | map { id, state -> + def mod_state = (!state.paired) ? + [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : + [] + [ id, state + mod_state ] + } - | map { id, state -> - def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } - [ id, mod_state ] - } + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } | setState ( "fastqc_html_1": "fastqc_html_1", @@ -1764,9 +1719,6 @@ workflow run_wf { "trim_html_1": "trim_html_1", "trim_html_2": "trim_html_2", "sortmerna_log": "sortmerna_log", - "failed_trim": "failed_trim", - "failed_trim_unpaired1": "failed_trim_unpaired1", - "failed_trim_unpaired2": "failed_trim_unpaired2", "trim_json": "trim_json", "trim_html": "trim_html", "trim_merged_out": "trim_merged_out", diff --git a/target/executable/workflows/prepare_genome/.config.vsh.yaml b/target/executable/workflows/prepare_genome/.config.vsh.yaml index 5849d7b..f80f17f 100644 --- a/target/executable/workflows/prepare_genome/.config.vsh.yaml +++ b/target/executable/workflows/prepare_genome/.config.vsh.yaml @@ -87,16 +87,14 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ - \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ - \ columns - short name and full path to reference genome(s)" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." info: null must_exist: true create_parent: true required: false direction: "input" - multiple: false + multiple: true multiple_sep: ";" - type: "file" name: "--star_index" @@ -126,15 +124,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "extra_rsem_prepare_reference_args" - description: "Extra arguments to pass to rsem-prepare-reference command in addition\ - \ to defaults defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "file" name: "--salmon_index" description: "Path to directory or tar.gz archive for pre-built Salmon index." @@ -382,7 +371,7 @@ dependencies: - name: "gffread" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "cat_additional_fasta" repository: @@ -399,7 +388,7 @@ dependencies: - name: "rsem/rsem_prepare_reference" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "getchromsizes" repository: @@ -412,23 +401,27 @@ dependencies: - name: "star/star_genome_generate" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" -- name: "bbmap_bbsplit" +- name: "bbmap/bbmap_bbsplit" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "salmon/salmon_index" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "kallisto/kallisto_index" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -509,8 +502,8 @@ build_info: output: "target/executable/workflows/prepare_genome" executable: "target/executable/workflows/prepare_genome/prepare_genome" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/gunzip" - "target/dependencies/vsh/vsh/biobox/main/nextflow/gffread" @@ -522,9 +515,9 @@ build_info: - "target/nextflow/getchromsizes" - "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar" - "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate" - - "target/nextflow/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index" - - "target/nextflow/kallisto/kallisto_index" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index" package_config: name: "rnaseq" version: "main" @@ -535,7 +528,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/prepare_genome/prepare_genome b/target/executable/workflows/prepare_genome/prepare_genome index ef62ac1..f8fcf0b 100755 --- a/target/executable/workflows/prepare_genome/prepare_genome +++ b/target/executable/workflows/prepare_genome/prepare_genome @@ -212,11 +212,9 @@ function ViashHelp { echo " Skip BBSplit for removal of non-reference genome reads." echo "" echo " --bbsplit_fasta_list" - echo " type: file, file must exist" - echo " Path to comma-separated file containing a list of reference genomes to" - echo " filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must" - echo " be explicitly set to \"false\". The file should contain 2 (comma" - echo " separated) columns - short name and full path to reference genome(s)" + echo " type: file, multiple values allowed, file must exist" + echo " List of reference genomes (separated by \";\") to filter reads against" + echo " with BBSplit." echo "" echo " --star_index" echo " type: file, file must exist" @@ -230,11 +228,6 @@ function ViashHelp { echo " type: file, file must exist" echo " Path to directory or tar.gz archive for pre-built RSEM index." echo "" - echo " extra_rsem_prepare_reference_args" - echo " type: string" - echo " Extra arguments to pass to rsem-prepare-reference command in addition to" - echo " defaults defined by the pipeline." - echo "" echo " --salmon_index" echo " type: file, file must exist" echo " Path to directory or tar.gz archive for pre-built Salmon index." @@ -454,14 +447,20 @@ while [[ $# -gt 0 ]]; do shift 1 ;; --bbsplit_fasta_list) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST="$2" + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST="$2" + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;""$2" + fi [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_fasta_list. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; --bbsplit_fasta_list=*) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list=*\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;"$(ViashRemoveFlags "$1") + fi shift 1 ;; --star_index) @@ -824,12 +823,6 @@ if [ -z "$VIASH_META_CPUS" ]; then fi -# storing leftover values in positionals -if [[ $# -gt 0 ]]; then - VIASH_PAR_EXTRA_RSEM_PREPARE_REFERENCE_ARGS="$1" - shift 1 -fi - # check whether required parameters exist if [ -z ${VIASH_PAR_FASTA+x} ]; then ViashError '--fasta' is a required argument. Use "--help" to get more information on the parameters. @@ -936,9 +929,17 @@ if [ ! -z "$VIASH_PAR_SPLICESITES" ] && [ ! -e "$VIASH_PAR_SPLICESITES" ]; then ViashError "Input file '$VIASH_PAR_SPLICESITES' does not exist." exit 1 fi -if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && [ ! -e "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then - ViashError "Input file '$VIASH_PAR_BBSPLIT_FASTA_LIST' does not exist." - exit 1 +if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_BBSPLIT_FASTA_LIST; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f fi if [ ! -z "$VIASH_PAR_STAR_INDEX" ] && [ ! -e "$VIASH_PAR_STAR_INDEX" ]; then ViashError "Input file '$VIASH_PAR_STAR_INDEX' does not exist." @@ -1142,13 +1143,13 @@ VIASH_DEP_GTF2BED="$VIASH_META_RESOURCES_DIR/../../../nextflow/gtf2bed/main.nf" VIASH_DEP_PREPROCESS_TRANSCRIPTS_FASTA="$VIASH_META_RESOURCES_DIR/../../../nextflow/preprocess_transcripts_fasta/main.nf" VIASH_DEP_GTF_FILTER="$VIASH_META_RESOURCES_DIR/../../../nextflow/gtf_filter/main.nf" VIASH_DEP_GETCHROMSIZES="$VIASH_META_RESOURCES_DIR/../../../nextflow/getchromsizes/main.nf" -VIASH_DEP_BBMAP_BBSPLIT="$VIASH_META_RESOURCES_DIR/../../../nextflow/bbmap_bbsplit/main.nf" -VIASH_DEP_KALLISTO_KALLISTO_INDEX="$VIASH_META_RESOURCES_DIR/../../../nextflow/kallisto/kallisto_index/main.nf" VIASH_DEP_GFFREAD="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/gffread/main.nf" VIASH_DEP_RSEM_RSEM_PREPARE_REFERENCE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_prepare_reference/main.nf" VIASH_DEP_UNTAR="$VIASH_TARGET_DIR/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf" VIASH_DEP_STAR_STAR_GENOME_GENERATE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/main.nf" +VIASH_DEP_BBMAP_BBMAP_BBSPLIT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf" VIASH_DEP_SALMON_SALMON_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/main.nf" +VIASH_DEP_KALLISTO_KALLISTO_INDEX="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf" ViashDebug "Running command: $(echo $VIASH_CMD)" cat << VIASHEOF | eval $VIASH_CMD @@ -1308,43 +1309,45 @@ workflow run_wf { [ id, state + [transcript_fasta: transcript_fasta] ] } - // chromosome size and fai index - | getchromsizes.run ( - fromState: [ "fasta": "fasta" ], - toState: [ - "fai": "fai", - "sizes": "sizes" - ], - key: "chromsizes", - args: [ - fai: "genome_additional.fasta.fai", - sizes: "genome_additional.fasta.sizes" - ] - ) - - // untar bbsplit index, if available - | untar.run ( - runIf: {id, state -> state.bbsplit_index}, - fromState: [ "input": "bbsplit_index" ], - toState: [ "bbsplit_index": "output" ], - key: "untar_bbsplit_index", - args: [output: "BBSplit_index"] - ) - - // create bbsplit index, if not already availble - | bbmap_bbsplit.run ( - runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, - fromState: [ - "primary_ref": "fasta", - "bbsplit_fasta_list": "bbsplit_fasta_list" - ], - toState: [ "bbsplit_index": "bbsplit_index" ], - args: [ - only_build_index: true, - bbsplit_index: "BBSplit_index" - ], - key: "generate_bbsplit_index" - ) + // chromosome size and fai index + | getchromsizes.run ( + fromState: [ "fasta": "fasta" ], + toState: [ + "fai": "fai", + "sizes": "sizes" + ], + key: "chromsizes", + args: [ + fai: "genome_additional.fasta.fai", + sizes: "genome_additional.fasta.sizes" + ] + ) + + // untar bbsplit index, if available + | untar.run ( + runIf: {id, state -> state.bbsplit_index}, + fromState: [ "input": "bbsplit_index" ], + toState: [ "bbsplit_index": "output" ], + key: "untar_bbsplit_index", + args: [output: "BBSplit_index"] + ) + + | map {id, state -> + def ref = [state.fasta] + state.bbsplit_fasta_list + [id, state + [bbsplit_ref: ref] ] + } + + // create bbsplit index, if not already availble + | bbmap_bbsplit.run ( + runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, + fromState: ["ref": "bbsplit_ref"], + toState: [ "bbsplit_index": "index" ], + args: [ + only_build_index: true, + index: "BBSplit_index" + ], + key: "generate_bbsplit_index" + ) // Uncompress STAR index or generate from scratch if required | untar.run ( @@ -1421,16 +1424,16 @@ workflow run_wf { args: [output: "Kallisto_index"] ) - | kallisto_index.run( - runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, - fromState: [ - "transcriptome_fasta": "transcript_fasta", - "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size" - ], - toState: [ "kallisto_index": "kallisto_index" ], - key: "generate_kallisto_index", - args: [kallisto_index: "Kallisto_index"] - ) + | kallisto_index.run( + runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, + fromState: [ + "input": "transcript_fasta", + "kmer_size": "pseudo_aligner_kmer_size" + ], + toState: [ "kallisto_index": "index" ], + key: "generate_kallisto_index", + args: [index: "Kallisto_index"] + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml b/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml index 47ebc95..f2ad543 100644 --- a/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml +++ b/target/executable/workflows/pseudo_alignment_and_quant/.config.vsh.yaml @@ -116,7 +116,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length" description: "For single-end mode only, the estimated average fragment length\ \ to use for quantification with Kallisto." @@ -125,7 +125,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length_sd" description: "For single-end mode only, the estimated standard deviation of the\ \ fragment length for quantification with Kallisto." @@ -194,15 +194,17 @@ dependencies: - name: "salmon/salmon_quant" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "kallisto/kallisto_quant" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -283,11 +285,11 @@ build_info: output: "target/executable/workflows/pseudo_alignment_and_quant" executable: "target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" - - "target/nextflow/kallisto/kallisto_quant" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant" package_config: name: "rnaseq" version: "main" @@ -298,7 +300,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant b/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant index bdd7eeb..40bfd44 100755 --- a/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant +++ b/target/executable/workflows/pseudo_alignment_and_quant/pseudo_alignment_and_quant @@ -227,12 +227,12 @@ function ViashHelp { echo " object" echo "" echo " --kallisto_quant_fragment_length" - echo " type: integer" + echo " type: double" echo " For single-end mode only, the estimated average fragment length to use" echo " for quantification with Kallisto." echo "" echo " --kallisto_quant_fragment_length_sd" - echo " type: integer" + echo " type: double" echo " For single-end mode only, the estimated standard deviation of the" echo " fragment length for quantification with Kallisto." echo "" @@ -645,14 +645,14 @@ fi # check whether parameters values are of the right type if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ]]; then - if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--kallisto_quant_fragment_length' has to be an integer. Use "--help" to get more information on the parameters. + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length' has to be a double. Use "--help" to get more information on the parameters. exit 1 fi fi if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ]]; then - if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--kallisto_quant_fragment_length_sd' has to be an integer. Use "--help" to get more information on the parameters. + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length_sd' has to be a double. Use "--help" to get more information on the parameters. exit 1 fi fi @@ -779,8 +779,8 @@ fi # set dependency paths -VIASH_DEP_KALLISTO_KALLISTO_QUANT="$VIASH_META_RESOURCES_DIR/../../../nextflow/kallisto/kallisto_quant/main.nf" VIASH_DEP_SALMON_SALMON_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf" +VIASH_DEP_KALLISTO_KALLISTO_QUANT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf" ViashDebug "Running command: $(echo $VIASH_CMD)" cat << VIASHEOF | eval $VIASH_CMD @@ -859,22 +859,32 @@ workflow run_wf { [ id, mod_state ] } - | kallisto_quant.run ( - runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, - fromState: [ - "input": "input", - "paired": "paired", - "gtf": "gtf", - "index": "kallisto_index", - "fragment_length": "kallisto_quant_fragment_length", - "fragment_length_sd": "kallisto_quant_fragment_length_sd" - ], - toState: [ - "quant_out_dir": "output", - "kallisto_quant_results_file": "quant_results_file", - "pseudo_multiqc": "log" + | kallisto_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, + fromState: { id, state -> + def fr_stranded = state.strandedness == 'forward' + def rf_stranded = state.strandedness == 'reverse' + [ + input: state.input, + index: state.kallisto_index, + fragment_length: state.kallisto_quant_fragment_length, + sd: state.kallisto_quant_fragment_length_sd, + single: !state.paired, + fr_stranded: fr_stranded, + rf_stranded: rf_stranded, ] - ) + }, + args: [log: "kallisto_quant.log"], + toState: { id, output_state, state -> + def neKeys = [ + "quant_out_dir": output_state["output_dir"], + "kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv", + "pseudo_multiqc": output_state["log"] + ] + def new_state = state + newKeys + return new_state + } + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/target/executable/workflows/quality_control/.config.vsh.yaml b/target/executable/workflows/quality_control/.config.vsh.yaml index e5d254c..2271736 100644 --- a/target/executable/workflows/quality_control/.config.vsh.yaml +++ b/target/executable/workflows/quality_control/.config.vsh.yaml @@ -281,15 +281,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_featurecounts_args" - description: "Extra arguments to pass to featureCounts command in addition to\ - \ defaults defined by the pipeline" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "string" name: "--rseqc_modules" description: "Specify the RSeQC modules to run_wf" @@ -472,19 +463,6 @@ argument_groups: \ to determine tin. Only use this option if there are substantial intronic reads." info: null direction: "input" - - type: "string" - name: "--output_format" - description: "Format of the qualimap output report (PDF or HTML, default is HTML)" - info: null - default: - - "html" - required: false - choices: - - "html" - - "pdf" - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--pr_bases" description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ @@ -1127,21 +1105,33 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_pdf" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." info: null - default: - - "$id.qualimap_output.pdf" - must_exist: false + example: + - "$id.rnaseq_qc_results.txt" + must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_dir" + name: "--qualimap_counts" + description: "Output file for computed counts." info: null - default: - - "$id.qualimap_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + example: + - "$id.report.html" must_exist: true create_parent: true required: false @@ -1416,13 +1406,19 @@ requirements: dependencies: - name: "rseqc/rseqc_bamstat" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "rseqc/rseqc_inferexperiment" repository: - type: "local" -- name: "rseqc/rseqc_innerdistance" + type: "vsh" + repo: "biobox" + tag: "main" +- name: "rseqc/rseqc_inner_distance" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "rseqc/rseqc_junctionannotation" repository: type: "local" @@ -1441,16 +1437,18 @@ dependencies: - name: "dupradar" repository: type: "local" -- name: "qualimap" +- name: "qualimap/qualimap_rnaseq" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "preseq_lcextrap" repository: type: "local" - name: "featurecounts" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "multiqc_custom_biotype" repository: @@ -1464,9 +1462,9 @@ dependencies: - name: "multiqc" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" -- name: "rsem/rsem_merge_counts" +- name: "rsem_merge_counts" repository: type: "local" - name: "workflows/merge_quant_results" @@ -1475,7 +1473,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -1556,26 +1554,26 @@ build_info: output: "target/executable/workflows/quality_control" executable: "target/executable/workflows/quality_control/quality_control" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - - "target/nextflow/rseqc/rseqc_bamstat" - - "target/nextflow/rseqc/rseqc_inferexperiment" - - "target/nextflow/rseqc/rseqc_innerdistance" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance" - "target/nextflow/rseqc/rseqc_junctionannotation" - "target/nextflow/rseqc/rseqc_junctionsaturation" - "target/nextflow/rseqc/rseqc_readdistribution" - "target/nextflow/rseqc/rseqc_readduplication" - "target/nextflow/rseqc/rseqc_tin" - "target/nextflow/dupradar" - - "target/nextflow/qualimap" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq" - "target/nextflow/preseq_lcextrap" - "target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts" - "target/nextflow/multiqc_custom_biotype" - "target/nextflow/deseq2_qc" - "target/nextflow/prepare_multiqc_input" - "target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc" - - "target/nextflow/rsem/rsem_merge_counts" + - "target/nextflow/rsem_merge_counts" - "target/nextflow/workflows/merge_quant_results" package_config: name: "rnaseq" @@ -1587,7 +1585,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/quality_control/quality_control b/target/executable/workflows/quality_control/quality_control index 95da924..c1cd030 100755 --- a/target/executable/workflows/quality_control/quality_control +++ b/target/executable/workflows/quality_control/quality_control @@ -313,11 +313,6 @@ function ViashHelp { echo " Biotype value to use while appending entries to GTF file when additional" echo " fasta file is provided." echo "" - echo " --extra_featurecounts_args" - echo " type: string" - echo " Extra arguments to pass to featureCounts command in addition to defaults" - echo " defined by the pipeline" - echo "" echo " --rseqc_modules" echo " type: string, multiple values allowed" echo " default:" @@ -424,12 +419,6 @@ function ViashHelp { echo " determine tin. Only use this option if there are substantial intronic" echo " reads." echo "" - echo " --output_format" - echo " type: string" - echo " default: html" - echo " choices: [ html, pdf ]" - echo " Format of the qualimap output report (PDF or HTML, default is HTML)" - echo "" echo " --pr_bases" echo " type: integer" echo " default: 100" @@ -701,13 +690,19 @@ function ViashHelp { echo " type: file, output, file must exist" echo " default: \$id.intercept_slope.txt" echo "" - echo " --qualimap_output_pdf" - echo " type: file, output" - echo " default: \$id.qualimap_output.pdf" - echo "" - echo " --qualimap_output_dir" + echo " --qualimap_qc_report" echo " type: file, output, file must exist" - echo " default: \$id.qualimap_output" + echo " example: \$id.rnaseq_qc_results.txt" + echo " Text file containing the RNAseq QC results." + echo "" + echo " --qualimap_counts" + echo " type: file, output, file must exist" + echo " Output file for computed counts." + echo "" + echo " --qualimap_report" + echo " type: file, output, file must exist" + echo " example: \$id.report.html" + echo " Report output file. Supported formats are PDF or HTML." echo "" echo " --deseq2_output" echo " type: file, output, file must exist" @@ -1146,17 +1141,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_BIOTYPE=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_featurecounts_args) - [ -n "$VIASH_PAR_EXTRA_FEATURECOUNTS_ARGS" ] && ViashError Bad arguments for option \'--extra_featurecounts_args\': \'$VIASH_PAR_EXTRA_FEATURECOUNTS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FEATURECOUNTS_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_featurecounts_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_featurecounts_args=*) - [ -n "$VIASH_PAR_EXTRA_FEATURECOUNTS_ARGS" ] && ViashError Bad arguments for option \'--extra_featurecounts_args=*\': \'$VIASH_PAR_EXTRA_FEATURECOUNTS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FEATURECOUNTS_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --rseqc_modules) if [ -z "$VIASH_PAR_RSEQC_MODULES" ]; then VIASH_PAR_RSEQC_MODULES="$2" @@ -1322,17 +1306,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_SUBTRACT_BACKGROUND=true shift 1 ;; - --output_format) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_format. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output_format=*) - [ -n "$VIASH_PAR_OUTPUT_FORMAT" ] && ViashError Bad arguments for option \'--output_format=*\': \'$VIASH_PAR_OUTPUT_FORMAT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_FORMAT=$(ViashRemoveFlags "$1") - shift 1 - ;; --pr_bases) [ -n "$VIASH_PAR_PR_BASES" ] && ViashError Bad arguments for option \'--pr_bases\': \'$VIASH_PAR_PR_BASES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_PR_BASES="$2" @@ -1987,26 +1960,37 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE=$(ViashRemoveFlags "$1") shift 1 ;; - --qualimap_output_pdf) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_PDF" ] && ViashError Bad arguments for option \'--qualimap_output_pdf\': \'$VIASH_PAR_QUALIMAP_OUTPUT_PDF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_PDF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_output_pdf. Use "--help" to get more information on the parameters. && exit 1 + --qualimap_qc_report) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_qc_report. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; - --qualimap_output_pdf=*) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_PDF" ] && ViashError Bad arguments for option \'--qualimap_output_pdf=*\': \'$VIASH_PAR_QUALIMAP_OUTPUT_PDF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_PDF=$(ViashRemoveFlags "$1") + --qualimap_qc_report=*) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report=*\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT=$(ViashRemoveFlags "$1") shift 1 ;; - --qualimap_output_dir) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--qualimap_output_dir\': \'$VIASH_PAR_QUALIMAP_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_DIR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_output_dir. Use "--help" to get more information on the parameters. && exit 1 + --qualimap_counts) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_counts. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; - --qualimap_output_dir=*) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--qualimap_output_dir=*\': \'$VIASH_PAR_QUALIMAP_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_DIR=$(ViashRemoveFlags "$1") + --qualimap_counts=*) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts=*\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_report) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_report=*) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report=*\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT=$(ViashRemoveFlags "$1") shift 1 ;; --deseq2_output) @@ -2488,9 +2472,6 @@ fi if [ -z ${VIASH_PAR_SUBTRACT_BACKGROUND+x} ]; then VIASH_PAR_SUBTRACT_BACKGROUND="false" fi -if [ -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then - VIASH_PAR_OUTPUT_FORMAT="html" -fi if [ -z ${VIASH_PAR_PR_BASES+x} ]; then VIASH_PAR_PR_BASES="100" fi @@ -2602,12 +2583,6 @@ fi if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE="\$id.intercept_slope.txt" fi -if [ -z ${VIASH_PAR_QUALIMAP_OUTPUT_PDF+x} ]; then - VIASH_PAR_QUALIMAP_OUTPUT_PDF="\$id.qualimap_output.pdf" -fi -if [ -z ${VIASH_PAR_QUALIMAP_OUTPUT_DIR+x} ]; then - VIASH_PAR_QUALIMAP_OUTPUT_DIR="\$id.qualimap_output" -fi if [ -z ${VIASH_PAR_DESEQ2_OUTPUT+x} ]; then VIASH_PAR_DESEQ2_OUTPUT="deseq2" fi @@ -3093,18 +3068,6 @@ if [ ! -z "$VIASH_PAR_RSEQC_MODULES" ]; then unset IFS fi -if [ ! -z "$VIASH_PAR_OUTPUT_FORMAT" ]; then - VIASH_PAR_OUTPUT_FORMAT_CHOICES=("html;pdf") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_OUTPUT_FORMAT_CHOICES[*]};" =~ ";$VIASH_PAR_OUTPUT_FORMAT;" ]]; then - ViashError '--output_format' specified value of \'$VIASH_PAR_OUTPUT_FORMAT\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - if [ ! -z "$VIASH_PAR_SEQUENCING_PROTOCOL" ]; then VIASH_PAR_SEQUENCING_PROTOCOL_CHOICES=("non-strand-specific;strand-specific-reverse;strand-specific-forward") IFS=';' @@ -3211,11 +3174,14 @@ fi if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" ]; then mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" fi -if [ ! -z "$VIASH_PAR_QUALIMAP_OUTPUT_PDF" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_PDF")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_PDF")" +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" fi -if [ ! -z "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_DIR")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_DIR")" +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" fi if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" ]; then mkdir -p "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" @@ -3298,22 +3264,22 @@ fi # set dependency paths -VIASH_DEP_RSEQC_RSEQC_BAMSTAT="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_bamstat/main.nf" -VIASH_DEP_RSEQC_RSEQC_INFEREXPERIMENT="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_inferexperiment/main.nf" -VIASH_DEP_RSEQC_RSEQC_INNERDISTANCE="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_innerdistance/main.nf" VIASH_DEP_RSEQC_RSEQC_JUNCTIONANNOTATION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_junctionannotation/main.nf" VIASH_DEP_RSEQC_RSEQC_JUNCTIONSATURATION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_junctionsaturation/main.nf" VIASH_DEP_RSEQC_RSEQC_READDISTRIBUTION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_readdistribution/main.nf" VIASH_DEP_RSEQC_RSEQC_READDUPLICATION="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_readduplication/main.nf" VIASH_DEP_RSEQC_RSEQC_TIN="$VIASH_META_RESOURCES_DIR/../../../nextflow/rseqc/rseqc_tin/main.nf" VIASH_DEP_DUPRADAR="$VIASH_META_RESOURCES_DIR/../../../nextflow/dupradar/main.nf" -VIASH_DEP_QUALIMAP="$VIASH_META_RESOURCES_DIR/../../../nextflow/qualimap/main.nf" VIASH_DEP_PRESEQ_LCEXTRAP="$VIASH_META_RESOURCES_DIR/../../../nextflow/preseq_lcextrap/main.nf" VIASH_DEP_MULTIQC_CUSTOM_BIOTYPE="$VIASH_META_RESOURCES_DIR/../../../nextflow/multiqc_custom_biotype/main.nf" VIASH_DEP_DESEQ2_QC="$VIASH_META_RESOURCES_DIR/../../../nextflow/deseq2_qc/main.nf" VIASH_DEP_PREPARE_MULTIQC_INPUT="$VIASH_META_RESOURCES_DIR/../../../nextflow/prepare_multiqc_input/main.nf" -VIASH_DEP_RSEM_RSEM_MERGE_COUNTS="$VIASH_META_RESOURCES_DIR/../../../nextflow/rsem/rsem_merge_counts/main.nf" +VIASH_DEP_RSEM_MERGE_COUNTS="$VIASH_META_RESOURCES_DIR/../../../nextflow/rsem_merge_counts/main.nf" VIASH_DEP_WORKFLOWS_MERGE_QUANT_RESULTS="$VIASH_META_RESOURCES_DIR/../../../nextflow/workflows/merge_quant_results/main.nf" +VIASH_DEP_RSEQC_RSEQC_BAMSTAT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf" +VIASH_DEP_RSEQC_RSEQC_INFEREXPERIMENT="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf" +VIASH_DEP_RSEQC_RSEQC_INNER_DISTANCE="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf" +VIASH_DEP_QUALIMAP_QUALIMAP_RNASEQ="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf" VIASH_DEP_FEATURECOUNTS="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/main.nf" VIASH_DEP_MULTIQC="$VIASH_TARGET_DIR/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/main.nf" @@ -3376,145 +3342,145 @@ workflow run_wf { ] ) - | multiqc_custom_biotype.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, - fromState: [ - "id": "id", - "biocounts": "featurecounts", - "biotypes_header": "biotypes_header" - ], - toState: [ - "featurecounts_multiqc": "featurecounts_multiqc", - "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" - ] - ) - - | preseq_lcextrap.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, - fromState: [ - "paired": "paired", - "input": "genome_bam", - "extra_preseq_args": "extra_preseq_args" - ], - toState: [ "preseq_output": "output" ] - ) - - | rseqc_bamstat.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "map_qual": "map_qual" - ], - toState: [ "bamstat_output": "output" ] - ) - | rseqc_inferexperiment.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sample_size": "sample_size", - "map_qual": "map_qual" - ], - toState: [ "strandedness_output": "output" ] - ) - // Get predicted strandedness from the RSeQC infer_experiment.py output - | map { id, state -> - def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) - def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true - [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] - } - | rseqc_innerdistance.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, - key: "inner_distance", - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sample_size": "sample_size", - "map_qual": "map_qual", - "lower_bound_size": "lower_bound_size", - "upper_bound_size": "upper_bound_size", - "step_size": "step_size" - ], - toState: [ - "inner_dist_output_stats": "output_stats", - "inner_dist_output_dist": "output_dist", - "inner_dist_output_freq": "output_freq", - "inner_dist_output_plot": "output_plot", - "inner_dist_output_plot_r": "output_plot_r" - ] - ) - | rseqc_junctionannotation.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "map_qual": "map_qual", - "min_intron": "min_intron" - ], - toState: [ - "junction_annotation_output_log": "output_log", - "junction_annotation_output_plot_r": "output_plot_r", - "junction_annotation_output_junction_bed": "output_junction_bed", - "junction_annotation_output_junction_interact": "output_junction_interact", - "junction_annotation_output_junction_sheet": "output_junction_sheet", - "junction_annotation_output_splice_events_plot": "output_splice_events_plot", - "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" - ] - ) - | rseqc_junctionsaturation.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", - "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", - "sampling_percentile_step": "sampling_percentile_step", - "min_intron": "min_intron", - "min_splice_read": "min_splice_read", - "map_qual": "map_qual" - ], - toState: [ - "junction_saturation_output_plot_r": "output_plot_r", - "junction_saturation_output_plot": "output_plot" - ] - ) - | rseqc_readdistribution.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - ], - toState: [ "read_distribution_output": "output" ] - ) - | rseqc_readduplication.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "read_count_upper_limit": "read_count_upper_limit", - "map_qual": "map_qual" - ], - toState: [ - "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", - "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", - "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", - "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" - ] - ) - | rseqc_tin.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "bam_input": "genome_bam", - "bai_input": "genome_bam_index", - "refgene": "gene_bed", - "minimum_coverage": "minimum_coverage", - "sample_size": "tin_sample_size", - "subtract_background": "subtract_background" - ], - toState: [ - "tin_output_summary": "output_tin_summary", - "tin_output_metrics": "output_tin" - ] - ) + | multiqc_custom_biotype.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, + fromState: [ + "id": "id", + "biocounts": "featurecounts", + "biotypes_header": "biotypes_header" + ], + toState: [ + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" + ] + ) + + | preseq_lcextrap.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, + fromState: [ + "paired": "paired", + "input": "genome_bam", + "extra_preseq_args": "extra_preseq_args" + ], + toState: [ "preseq_output": "output" ] + ) + + | rseqc_bamstat.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "mapq": "map_qual" + ], + toState: [ "bamstat_output": "output" ] + ) + | rseqc_inferexperiment.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual" + ], + toState: [ "strandedness_output": "output" ] + ) + // Get predicted strandedness from the RSeQC infer_experiment.py output + | map { id, state -> + def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) + def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true + [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] + } + | rseqc_inner_distance.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, + key: "inner_distance", + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual", + "lower_bound": "lower_bound_size", + "upper_bound": "upper_bound_size", + "step": "step_size" + ], + toState: [ + "inner_dist_output_stats": "output_stats", + "inner_dist_output_dist": "output_dist", + "inner_dist_output_freq": "output_freq", + "inner_dist_output_plot": "output_plot", + "inner_dist_output_plot_r": "output_plot_r" + ] + ) + | rseqc_junctionannotation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "map_qual": "map_qual", + "min_intron": "min_intron" + ], + toState: [ + "junction_annotation_output_log": "output_log", + "junction_annotation_output_plot_r": "output_plot_r", + "junction_annotation_output_junction_bed": "output_junction_bed", + "junction_annotation_output_junction_interact": "output_junction_interact", + "junction_annotation_output_junction_sheet": "output_junction_sheet", + "junction_annotation_output_splice_events_plot": "output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" + ] + ) + | rseqc_junctionsaturation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", + "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", + "sampling_percentile_step": "sampling_percentile_step", + "min_intron": "min_intron", + "min_splice_read": "min_splice_read", + "map_qual": "map_qual" + ], + toState: [ + "junction_saturation_output_plot_r": "output_plot_r", + "junction_saturation_output_plot": "output_plot" + ] + ) + | rseqc_readdistribution.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + ], + toState: [ "read_distribution_output": "output" ] + ) + | rseqc_readduplication.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "read_count_upper_limit": "read_count_upper_limit", + "map_qual": "map_qual" + ], + toState: [ + "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" + ] + ) + | rseqc_tin.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "bam_input": "genome_bam", + "bai_input": "genome_bam_index", + "refgene": "gene_bed", + "minimum_coverage": "minimum_coverage", + "sample_size": "tin_sample_size", + "subtract_background": "subtract_background" + ], + toState: [ + "tin_output_summary": "output_tin_summary", + "tin_output_metrics": "output_tin" + ] + ) | dupradar.run( runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align }, @@ -3536,23 +3502,25 @@ workflow run_wf { ] ) - | qualimap.run( - runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "gtf": "gtf", - "pr_bases": "pr_bases", - "tr_bias": "tr_bias", - "algorithm": "algorithm", - "sequencing_protocol": "sequencing_protocol", - "sorted": "sorted", - "java_memory_size": "java_memory_size", - ], - toState: [ - "qualimap_output_pdf": "output_pdf", - "qualimap_output_dir": "output_dir" - ] - ) + // TODO: Add outdir as an output argument to the qualimap module on biobox. + // Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost. + | qualimap_rnaseq.run( + fromState: [ + "bam": "genome_bam", + "gtf": "gtf", + "num_pr_bases": "pr_bases", + "num_tr_bias": "tr_bias", + "algorithm": "algorithm", + "sequencing_protocol": "sequencing_protocol", + "sorted": "sorted", + "java_memory_size": "java_memory_size", + ], + toState: [ + "qualimap_report": "report", + "qualimap_qc_report": "qc_report", + "qualimap_counts": "counts" + ] + ) merged_ch = qc_ch | toSortedList @@ -3675,10 +3643,10 @@ workflow run_wf { (state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ? state.preseq_output : null } - def qualimap_output_dir = list.collect { id, state -> - (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? - state.qualimap_output_dir : - null } + // def qualimap_output_dir = list.collect { id, state -> + // (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? + // state.qualimap_output_dir : + // null } def dupradar_output_dup_intercept_mqc = list.collect { id, state -> (state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ? state.dupradar_output_dup_intercept_mqc : @@ -3763,7 +3731,7 @@ workflow run_wf { featurecounts_multiqc: featurecounts_multiqc, featurecounts_rrna_multiqc: featurecounts_rrna_multiqc, preseq_output: preseq_output, - qualimap_output_dir: qualimap_output_dir, + // qualimap_output_dir: qualimap_output_dir, dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc, dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc, bamstat_output: bamstat_output, @@ -3942,7 +3910,7 @@ workflow run_wf { "pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo", "pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo", "preseq_multiqc": "preseq_output", - "qualimap_multiqc": "qualimap_output_dir", + // "qualimap_multiqc": "qualimap_output_dir", "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "bamstat_multiqc": "bamstat_output", @@ -4042,8 +4010,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "featurecounts": "featurecounts", "featurecounts_summary": "featurecounts_summary", "featurecounts_multiqc": "featurecounts_multiqc", @@ -4241,8 +4210,16 @@ if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -e "$VIASH_PAR_D ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE' does not exist." exit 1 fi -if [ ! -z "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && [ ! -e "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ]; then - ViashError "Output file '$VIASH_PAR_QUALIMAP_OUTPUT_DIR' does not exist." +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_QC_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_QC_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -e "$VIASH_PAR_QUALIMAP_COUNTS" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_COUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_REPORT' does not exist." exit 1 fi if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -e "$VIASH_PAR_DESEQ2_OUTPUT" ]; then diff --git a/target/executable/workflows/rnaseq/.config.vsh.yaml b/target/executable/workflows/rnaseq/.config.vsh.yaml index ba47302..4fc424a 100644 --- a/target/executable/workflows/rnaseq/.config.vsh.yaml +++ b/target/executable/workflows/rnaseq/.config.vsh.yaml @@ -237,24 +237,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_trimgalore_args" - description: "Extra arguments to pass to Trim Galore! command in addition to defaults\ - \ defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--extra_fastp_args" - description: "Extra arguments to pass to fastp command in addition to defaults\ - \ defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--min_trimmed_reads" description: "Minimum number of trimmed reads below which samples are removed\ @@ -271,16 +253,14 @@ argument_groups: arguments: - type: "file" name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ - \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ - \ columns - short name and full path to reference genome(s)" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." info: null must_exist: true create_parent: true required: false direction: "input" - multiple: false + multiple: true multiple_sep: ";" - type: "file" name: "--bbsplit_index" @@ -437,7 +417,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length" description: "For single-end mode only, the estimated average fragment length\ \ to use for quantification with Kallisto." @@ -446,7 +426,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length_sd" description: "For single-end mode only, the estimated standard deviation of the\ \ fragment length for quantification with Kallisto." @@ -470,17 +450,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_salmon_quant_args" - description: "Extra arguments to pass to salmon quant command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - "-v" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--min_mapped_reads" description: "Minimum percentage of uniquely mapped reads below which samples\ @@ -530,18 +499,6 @@ argument_groups: description: "Skip all of the pseudo-alignment-based processes within the pipeline." info: null direction: "input" - - type: "string" - name: "--extra_rsem_calculate_expression_args" - description: "Extra arguments to pass to rsem-calculate-expression command in\ - \ addition to defaults defined by the pipeline." - info: null - default: - - "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed\ - \ 1" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Process skipping options" arguments: - type: "boolean" @@ -636,17 +593,6 @@ argument_groups: direction: "input" - name: "Other process arguments" arguments: - - type: "string" - name: "--extra_fq_subsample_args" - description: "Extra arguments to pass to fq subsample command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - " --record-count 1000000 --seed 1" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "string" name: "--extra_picard_args" description: "Extra arguments to pass to picard MarkDuplicates command in addition\ @@ -659,17 +605,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_bedtools_args" - description: "Extra arguments to pass to bedtools genomecov command in addition\ - \ to defaults defined by the pipeline." - info: null - default: - - " -split -du" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "string" name: "--extra_preseq_args" description: "Extra arguments to pass to preseq lc_extrap command in addition\ @@ -840,7 +775,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "fastq/$id.read_1.fastq.gz" + - "fastq/${id}_r1.fastq.gz" must_exist: false create_parent: true required: false @@ -852,7 +787,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "fastq/$id.read_2.fastq.gz" + - "fastq/${id}_r2.fastq.gz" must_exist: false create_parent: true required: false @@ -864,7 +799,7 @@ argument_groups: description: "FastQC HTML report for read 1." info: null default: - - "fastqc_raw/$id.read_1.fastqc.html" + - "fastqc_raw/${id}_r1.fastqc.html" must_exist: false create_parent: true required: false @@ -876,7 +811,7 @@ argument_groups: description: "FastQC HTML report for read 2." info: null default: - - "fastqc_raw/$id.read_2.fastqc.html" + - "fastqc_raw/${id}_r2.fastqc.html" must_exist: false create_parent: true required: false @@ -888,7 +823,7 @@ argument_groups: description: "FastQC report archive for read 1." info: null default: - - "fastqc_raw/$id.read_1.fastqc.zip" + - "fastqc_raw/${id}_r1.fastqc.zip" must_exist: false create_parent: true required: false @@ -900,7 +835,7 @@ argument_groups: description: "FastQC report archive for read 2." info: null default: - - "fastqc_raw/$id.read_2.fastqc.zip" + - "fastqc_raw/${id}_r2.fastqc.zip" must_exist: false create_parent: true required: false @@ -911,7 +846,7 @@ argument_groups: name: "--trim_html_1" info: null default: - - "fastqc_trim/$id.read_1.trimmed_fastqc.html" + - "fastqc_trim/${id}_r1.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -922,7 +857,7 @@ argument_groups: name: "--trim_html_2" info: null default: - - "fastqc_trim/$id.read_2.trimmed_fastqc.html" + - "fastqc_trim/${id}_r2.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -933,7 +868,7 @@ argument_groups: name: "--trim_zip_1" info: null default: - - "fastqc_trim/$id.read_1.trimmed_fastqc.zip" + - "fastqc_trim/${id}_r1.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -944,7 +879,7 @@ argument_groups: name: "--trim_zip_2" info: null default: - - "fastqc_trim/$id.read_2.trimmed_fastqc.zip" + - "fastqc_trim/${id}_r2.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -955,7 +890,7 @@ argument_groups: name: "--trim_log_1" info: null default: - - "trimgalore/$id.read_1.trimming_report.txt" + - "trimgalore/${id}_r1.trimming_report.txt" must_exist: false create_parent: true required: false @@ -966,7 +901,7 @@ argument_groups: name: "--trim_log_2" info: null default: - - "trimgalore/$id.read_2.trimming_report.txt" + - "trimgalore/${id}_r2.trimming_report.txt" must_exist: false create_parent: true required: false @@ -1833,21 +1768,35 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_pdf" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." info: null default: - - "qualimap/$id.qualimap_output.pdf" - must_exist: false + - "Qualimap/$id.rnaseq_qc_results.txt" + must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_dir" + name: "--qualimap_counts" + description: "Output file for computed counts." info: null default: - - "qualimap/$id" + - "Qualimap/$id.counts.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + default: + - "Qualimap/$id.report.html" must_exist: true create_parent: true required: false @@ -2031,7 +1980,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -2112,8 +2061,8 @@ build_info: output: "target/executable/workflows/rnaseq" executable: "target/executable/workflows/rnaseq/rnaseq" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/workflows/prepare_genome" - "target/nextflow/cat_fastq" @@ -2132,7 +2081,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/executable/workflows/rnaseq/rnaseq b/target/executable/workflows/rnaseq/rnaseq index c35d2a6..fb6c439 100755 --- a/target/executable/workflows/rnaseq/rnaseq +++ b/target/executable/workflows/rnaseq/rnaseq @@ -283,16 +283,6 @@ function ViashHelp { echo " choices: [ trimgalore, fastp ]" echo " Specify the trimming tool to use." echo "" - echo " --extra_trimgalore_args" - echo " type: string" - echo " Extra arguments to pass to Trim Galore! command in addition to defaults" - echo " defined by the pipeline." - echo "" - echo " --extra_fastp_args" - echo " type: string" - echo " Extra arguments to pass to fastp command in addition to defaults defined" - echo " by the pipeline." - echo "" echo " --min_trimmed_reads" echo " type: integer" echo " default: 10000" @@ -302,11 +292,9 @@ function ViashHelp { echo "" echo "Read filtering options:" echo " --bbsplit_fasta_list" - echo " type: file, file must exist" - echo " Path to comma-separated file containing a list of reference genomes to" - echo " filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must" - echo " be explicitly set to \"false\". The file should contain 2 (comma" - echo " separated) columns - short name and full path to reference genome(s)" + echo " type: file, multiple values allowed, file must exist" + echo " List of reference genomes (separated by \";\") to filter reads against" + echo " with BBSplit." echo "" echo " --bbsplit_index" echo " type: file, file must exist" @@ -389,12 +377,12 @@ function ViashHelp { echo " Kmer length passed to indexing step of pseudoaligners." echo "" echo " --kallisto_quant_fragment_length" - echo " type: integer" + echo " type: double" echo " For single-end mode only, the estimated average fragment length to use" echo " for quantification with Kallisto." echo "" echo " --kallisto_quant_fragment_length_sd" - echo " type: integer" + echo " type: double" echo " For single-end mode only, the estimated standard deviation of the" echo " fragment length for quantification with Kallisto." echo "" @@ -408,12 +396,6 @@ function ViashHelp { echo " Override Salmon library type inferred based on strandedness defined in" echo " meta object." echo "" - echo " --extra_salmon_quant_args" - echo " type: string" - echo " default: -v" - echo " Extra arguments to pass to salmon quant command in addition to defaults" - echo " defined by the pipeline." - echo "" echo " --min_mapped_reads" echo " type: integer" echo " default: 5" @@ -448,13 +430,6 @@ function ViashHelp { echo " type: boolean_true" echo " Skip all of the pseudo-alignment-based processes within the pipeline." echo "" - echo " --extra_rsem_calculate_expression_args" - echo " type: string" - echo " default: --star --star-output-genome-bam --star-gzipped-read-file" - echo "--estimate-rspd --seed 1" - echo " Extra arguments to pass to rsem-calculate-expression command in addition" - echo " to defaults defined by the pipeline." - echo "" echo "Process skipping options:" echo " --skip_fastqc" echo " type: boolean" @@ -520,12 +495,6 @@ function ViashHelp { echo " Skip MultiQC." echo "" echo "Other process arguments:" - echo " --extra_fq_subsample_args" - echo " type: string" - echo " default: --record-count 1000000 --seed 1" - echo " Extra arguments to pass to fq subsample command in addition to defaults" - echo " defined by the pipeline." - echo "" echo " --extra_picard_args" echo " type: string" echo " default: --ASSUME_SORTED true --REMOVE_DUPLICATES false" @@ -533,12 +502,6 @@ function ViashHelp { echo " Extra arguments to pass to picard MarkDuplicates command in addition to" echo " defaults defined by the pipeline." echo "" - echo " --extra_bedtools_args" - echo " type: string" - echo " default: -split -du" - echo " Extra arguments to pass to bedtools genomecov command in addition to" - echo " defaults defined by the pipeline." - echo "" echo " --extra_preseq_args" echo " type: string" echo " default: -verbose -seed 1 -seg_len 100000000" @@ -610,57 +573,57 @@ function ViashHelp { echo "" echo " --output_fastq_1" echo " type: file, output" - echo " default: fastq/\$id.read_1.fastq.gz" + echo " default: fastq/\${id}_r1.fastq.gz" echo " Path to output directory" echo "" echo " --output_fastq_2" echo " type: file, output" - echo " default: fastq/\$id.read_2.fastq.gz" + echo " default: fastq/\${id}_r2.fastq.gz" echo " Path to output directory" echo "" echo " --fastqc_html_1" echo " type: file, output" - echo " default: fastqc_raw/\$id.read_1.fastqc.html" + echo " default: fastqc_raw/\${id}_r1.fastqc.html" echo " FastQC HTML report for read 1." echo "" echo " --fastqc_html_2" echo " type: file, output" - echo " default: fastqc_raw/\$id.read_2.fastqc.html" + echo " default: fastqc_raw/\${id}_r2.fastqc.html" echo " FastQC HTML report for read 2." echo "" echo " --fastqc_zip_1" echo " type: file, output" - echo " default: fastqc_raw/\$id.read_1.fastqc.zip" + echo " default: fastqc_raw/\${id}_r1.fastqc.zip" echo " FastQC report archive for read 1." echo "" echo " --fastqc_zip_2" echo " type: file, output" - echo " default: fastqc_raw/\$id.read_2.fastqc.zip" + echo " default: fastqc_raw/\${id}_r2.fastqc.zip" echo " FastQC report archive for read 2." echo "" echo " --trim_html_1" echo " type: file, output" - echo " default: fastqc_trim/\$id.read_1.trimmed_fastqc.html" + echo " default: fastqc_trim/\${id}_r1.trimmed_fastqc.html" echo "" echo " --trim_html_2" echo " type: file, output" - echo " default: fastqc_trim/\$id.read_2.trimmed_fastqc.html" + echo " default: fastqc_trim/\${id}_r2.trimmed_fastqc.html" echo "" echo " --trim_zip_1" echo " type: file, output" - echo " default: fastqc_trim/\$id.read_1.trimmed_fastqc.zip" + echo " default: fastqc_trim/\${id}_r1.trimmed_fastqc.zip" echo "" echo " --trim_zip_2" echo " type: file, output" - echo " default: fastqc_trim/\$id.read_2.trimmed_fastqc.zip" + echo " default: fastqc_trim/\${id}_r2.trimmed_fastqc.zip" echo "" echo " --trim_log_1" echo " type: file, output" - echo " default: trimgalore/\$id.read_1.trimming_report.txt" + echo " default: trimgalore/\${id}_r1.trimming_report.txt" echo "" echo " --trim_log_2" echo " type: file, output" - echo " default: trimgalore/\$id.read_2.trimming_report.txt" + echo " default: trimgalore/\${id}_r2.trimming_report.txt" echo "" echo " --fastp_trim_json" echo " type: file, output, file must exist" @@ -1004,13 +967,20 @@ function ViashHelp { echo " type: file, output, file must exist" echo " default: dupradar/intercept_slope/\$id.intercept_slope.txt" echo "" - echo " --qualimap_output_pdf" - echo " type: file, output" - echo " default: qualimap/\$id.qualimap_output.pdf" - echo "" - echo " --qualimap_output_dir" + echo " --qualimap_qc_report" echo " type: file, output, file must exist" - echo " default: qualimap/\$id" + echo " default: Qualimap/\$id.rnaseq_qc_results.txt" + echo " Text file containing the RNAseq QC results." + echo "" + echo " --qualimap_counts" + echo " type: file, output, file must exist" + echo " default: Qualimap/\$id.counts.txt" + echo " Output file for computed counts." + echo "" + echo " --qualimap_report" + echo " type: file, output, file must exist" + echo " default: Qualimap/\$id.report.html" + echo " Report output file. Supported formats are PDF or HTML." echo "" echo " --deseq2_output" echo " type: file, output, file must exist" @@ -1342,28 +1312,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_TRIMMER=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_trimgalore_args) - [ -n "$VIASH_PAR_EXTRA_TRIMGALORE_ARGS" ] && ViashError Bad arguments for option \'--extra_trimgalore_args\': \'$VIASH_PAR_EXTRA_TRIMGALORE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_TRIMGALORE_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_trimgalore_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_trimgalore_args=*) - [ -n "$VIASH_PAR_EXTRA_TRIMGALORE_ARGS" ] && ViashError Bad arguments for option \'--extra_trimgalore_args=*\': \'$VIASH_PAR_EXTRA_TRIMGALORE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_TRIMGALORE_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --extra_fastp_args) - [ -n "$VIASH_PAR_EXTRA_FASTP_ARGS" ] && ViashError Bad arguments for option \'--extra_fastp_args\': \'$VIASH_PAR_EXTRA_FASTP_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FASTP_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_fastp_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_fastp_args=*) - [ -n "$VIASH_PAR_EXTRA_FASTP_ARGS" ] && ViashError Bad arguments for option \'--extra_fastp_args=*\': \'$VIASH_PAR_EXTRA_FASTP_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FASTP_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --min_trimmed_reads) [ -n "$VIASH_PAR_MIN_TRIMMED_READS" ] && ViashError Bad arguments for option \'--min_trimmed_reads\': \'$VIASH_PAR_MIN_TRIMMED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_MIN_TRIMMED_READS="$2" @@ -1376,14 +1324,20 @@ while [[ $# -gt 0 ]]; do shift 1 ;; --bbsplit_fasta_list) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST="$2" + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST="$2" + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;""$2" + fi [ $# -lt 2 ] && ViashError Not enough arguments passed to --bbsplit_fasta_list. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; --bbsplit_fasta_list=*) - [ -n "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && ViashError Bad arguments for option \'--bbsplit_fasta_list=*\': \'$VIASH_PAR_BBSPLIT_FASTA_LIST\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") + if [ -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + VIASH_PAR_BBSPLIT_FASTA_LIST=$(ViashRemoveFlags "$1") + else + VIASH_PAR_BBSPLIT_FASTA_LIST="$VIASH_PAR_BBSPLIT_FASTA_LIST;"$(ViashRemoveFlags "$1") + fi shift 1 ;; --bbsplit_index) @@ -1560,17 +1514,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_SALMON_QUANT_LIBTYPE=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_salmon_quant_args) - [ -n "$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS" ] && ViashError Bad arguments for option \'--extra_salmon_quant_args\': \'$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_SALMON_QUANT_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_salmon_quant_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_salmon_quant_args=*) - [ -n "$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS" ] && ViashError Bad arguments for option \'--extra_salmon_quant_args=*\': \'$VIASH_PAR_EXTRA_SALMON_QUANT_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_SALMON_QUANT_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --min_mapped_reads) [ -n "$VIASH_PAR_MIN_MAPPED_READS" ] && ViashError Bad arguments for option \'--min_mapped_reads\': \'$VIASH_PAR_MIN_MAPPED_READS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_MIN_MAPPED_READS="$2" @@ -1618,17 +1561,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_SKIP_PSEUDO_ALIGNMENT=true shift 1 ;; - --extra_rsem_calculate_expression_args) - [ -n "$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS" ] && ViashError Bad arguments for option \'--extra_rsem_calculate_expression_args\': \'$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_rsem_calculate_expression_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_rsem_calculate_expression_args=*) - [ -n "$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS" ] && ViashError Bad arguments for option \'--extra_rsem_calculate_expression_args=*\': \'$VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --skip_fastqc) [ -n "$VIASH_PAR_SKIP_FASTQC" ] && ViashError Bad arguments for option \'--skip_fastqc\': \'$VIASH_PAR_SKIP_FASTQC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_SKIP_FASTQC="$2" @@ -1722,17 +1654,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_SKIP_MULTIQC=true shift 1 ;; - --extra_fq_subsample_args) - [ -n "$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS" ] && ViashError Bad arguments for option \'--extra_fq_subsample_args\': \'$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_fq_subsample_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_fq_subsample_args=*) - [ -n "$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS" ] && ViashError Bad arguments for option \'--extra_fq_subsample_args=*\': \'$VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --extra_picard_args) [ -n "$VIASH_PAR_EXTRA_PICARD_ARGS" ] && ViashError Bad arguments for option \'--extra_picard_args\': \'$VIASH_PAR_EXTRA_PICARD_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_EXTRA_PICARD_ARGS="$2" @@ -1744,17 +1665,6 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_EXTRA_PICARD_ARGS=$(ViashRemoveFlags "$1") shift 1 ;; - --extra_bedtools_args) - [ -n "$VIASH_PAR_EXTRA_BEDTOOLS_ARGS" ] && ViashError Bad arguments for option \'--extra_bedtools_args\': \'$VIASH_PAR_EXTRA_BEDTOOLS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_BEDTOOLS_ARGS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --extra_bedtools_args. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --extra_bedtools_args=*) - [ -n "$VIASH_PAR_EXTRA_BEDTOOLS_ARGS" ] && ViashError Bad arguments for option \'--extra_bedtools_args=*\': \'$VIASH_PAR_EXTRA_BEDTOOLS_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_EXTRA_BEDTOOLS_ARGS=$(ViashRemoveFlags "$1") - shift 1 - ;; --extra_preseq_args) [ -n "$VIASH_PAR_EXTRA_PRESEQ_ARGS" ] && ViashError Bad arguments for option \'--extra_preseq_args\': \'$VIASH_PAR_EXTRA_PRESEQ_ARGS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_EXTRA_PRESEQ_ARGS="$2" @@ -2861,26 +2771,37 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE=$(ViashRemoveFlags "$1") shift 1 ;; - --qualimap_output_pdf) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_PDF" ] && ViashError Bad arguments for option \'--qualimap_output_pdf\': \'$VIASH_PAR_QUALIMAP_OUTPUT_PDF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_PDF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_output_pdf. Use "--help" to get more information on the parameters. && exit 1 + --qualimap_qc_report) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_qc_report. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; - --qualimap_output_pdf=*) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_PDF" ] && ViashError Bad arguments for option \'--qualimap_output_pdf=*\': \'$VIASH_PAR_QUALIMAP_OUTPUT_PDF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_PDF=$(ViashRemoveFlags "$1") + --qualimap_qc_report=*) + [ -n "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && ViashError Bad arguments for option \'--qualimap_qc_report=*\': \'$VIASH_PAR_QUALIMAP_QC_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_QC_REPORT=$(ViashRemoveFlags "$1") shift 1 ;; - --qualimap_output_dir) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--qualimap_output_dir\': \'$VIASH_PAR_QUALIMAP_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_DIR="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_output_dir. Use "--help" to get more information on the parameters. && exit 1 + --qualimap_counts) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_counts. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; - --qualimap_output_dir=*) - [ -n "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && ViashError Bad arguments for option \'--qualimap_output_dir=*\': \'$VIASH_PAR_QUALIMAP_OUTPUT_DIR\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_QUALIMAP_OUTPUT_DIR=$(ViashRemoveFlags "$1") + --qualimap_counts=*) + [ -n "$VIASH_PAR_QUALIMAP_COUNTS" ] && ViashError Bad arguments for option \'--qualimap_counts=*\': \'$VIASH_PAR_QUALIMAP_COUNTS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_COUNTS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --qualimap_report) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --qualimap_report. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --qualimap_report=*) + [ -n "$VIASH_PAR_QUALIMAP_REPORT" ] && ViashError Bad arguments for option \'--qualimap_report=*\': \'$VIASH_PAR_QUALIMAP_REPORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_QUALIMAP_REPORT=$(ViashRemoveFlags "$1") shift 1 ;; --deseq2_output) @@ -3227,9 +3148,6 @@ fi if [ -z ${VIASH_PAR_BAM_CSI_INDEX+x} ]; then VIASH_PAR_BAM_CSI_INDEX="false" fi -if [ -z ${VIASH_PAR_EXTRA_SALMON_QUANT_ARGS+x} ]; then - VIASH_PAR_EXTRA_SALMON_QUANT_ARGS="-v" -fi if [ -z ${VIASH_PAR_MIN_MAPPED_READS+x} ]; then VIASH_PAR_MIN_MAPPED_READS="5" fi @@ -3251,9 +3169,6 @@ fi if [ -z ${VIASH_PAR_SKIP_PSEUDO_ALIGNMENT+x} ]; then VIASH_PAR_SKIP_PSEUDO_ALIGNMENT="false" fi -if [ -z ${VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS+x} ]; then - VIASH_PAR_EXTRA_RSEM_CALCULATE_EXPRESSION_ARGS="--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" -fi if [ -z ${VIASH_PAR_SKIP_FASTQC+x} ]; then VIASH_PAR_SKIP_FASTQC="false" fi @@ -3299,15 +3214,9 @@ fi if [ -z ${VIASH_PAR_SKIP_MULTIQC+x} ]; then VIASH_PAR_SKIP_MULTIQC="false" fi -if [ -z ${VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS+x} ]; then - VIASH_PAR_EXTRA_FQ_SUBSAMPLE_ARGS=" --record-count 1000000 --seed 1" -fi if [ -z ${VIASH_PAR_EXTRA_PICARD_ARGS+x} ]; then VIASH_PAR_EXTRA_PICARD_ARGS=" --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" fi -if [ -z ${VIASH_PAR_EXTRA_BEDTOOLS_ARGS+x} ]; then - VIASH_PAR_EXTRA_BEDTOOLS_ARGS=" -split -du" -fi if [ -z ${VIASH_PAR_EXTRA_PRESEQ_ARGS+x} ]; then VIASH_PAR_EXTRA_PRESEQ_ARGS="-verbose -seed 1 -seg_len 100000000" fi @@ -3348,40 +3257,40 @@ if [ -z ${VIASH_PAR_OUTPUT_KALLISTO_INDEX+x} ]; then VIASH_PAR_OUTPUT_KALLISTO_INDEX="reference/index/Kallisto" fi if [ -z ${VIASH_PAR_OUTPUT_FASTQ_1+x} ]; then - VIASH_PAR_OUTPUT_FASTQ_1="fastq/\$id.read_1.fastq.gz" + VIASH_PAR_OUTPUT_FASTQ_1="fastq/\${id}_r1.fastq.gz" fi if [ -z ${VIASH_PAR_OUTPUT_FASTQ_2+x} ]; then - VIASH_PAR_OUTPUT_FASTQ_2="fastq/\$id.read_2.fastq.gz" + VIASH_PAR_OUTPUT_FASTQ_2="fastq/\${id}_r2.fastq.gz" fi if [ -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then - VIASH_PAR_FASTQC_HTML_1="fastqc_raw/\$id.read_1.fastqc.html" + VIASH_PAR_FASTQC_HTML_1="fastqc_raw/\${id}_r1.fastqc.html" fi if [ -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then - VIASH_PAR_FASTQC_HTML_2="fastqc_raw/\$id.read_2.fastqc.html" + VIASH_PAR_FASTQC_HTML_2="fastqc_raw/\${id}_r2.fastqc.html" fi if [ -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then - VIASH_PAR_FASTQC_ZIP_1="fastqc_raw/\$id.read_1.fastqc.zip" + VIASH_PAR_FASTQC_ZIP_1="fastqc_raw/\${id}_r1.fastqc.zip" fi if [ -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then - VIASH_PAR_FASTQC_ZIP_2="fastqc_raw/\$id.read_2.fastqc.zip" + VIASH_PAR_FASTQC_ZIP_2="fastqc_raw/\${id}_r2.fastqc.zip" fi if [ -z ${VIASH_PAR_TRIM_HTML_1+x} ]; then - VIASH_PAR_TRIM_HTML_1="fastqc_trim/\$id.read_1.trimmed_fastqc.html" + VIASH_PAR_TRIM_HTML_1="fastqc_trim/\${id}_r1.trimmed_fastqc.html" fi if [ -z ${VIASH_PAR_TRIM_HTML_2+x} ]; then - VIASH_PAR_TRIM_HTML_2="fastqc_trim/\$id.read_2.trimmed_fastqc.html" + VIASH_PAR_TRIM_HTML_2="fastqc_trim/\${id}_r2.trimmed_fastqc.html" fi if [ -z ${VIASH_PAR_TRIM_ZIP_1+x} ]; then - VIASH_PAR_TRIM_ZIP_1="fastqc_trim/\$id.read_1.trimmed_fastqc.zip" + VIASH_PAR_TRIM_ZIP_1="fastqc_trim/\${id}_r1.trimmed_fastqc.zip" fi if [ -z ${VIASH_PAR_TRIM_ZIP_2+x} ]; then - VIASH_PAR_TRIM_ZIP_2="fastqc_trim/\$id.read_2.trimmed_fastqc.zip" + VIASH_PAR_TRIM_ZIP_2="fastqc_trim/\${id}_r2.trimmed_fastqc.zip" fi if [ -z ${VIASH_PAR_TRIM_LOG_1+x} ]; then - VIASH_PAR_TRIM_LOG_1="trimgalore/\$id.read_1.trimming_report.txt" + VIASH_PAR_TRIM_LOG_1="trimgalore/\${id}_r1.trimming_report.txt" fi if [ -z ${VIASH_PAR_TRIM_LOG_2+x} ]; then - VIASH_PAR_TRIM_LOG_2="trimgalore/\$id.read_2.trimming_report.txt" + VIASH_PAR_TRIM_LOG_2="trimgalore/\${id}_r2.trimming_report.txt" fi if [ -z ${VIASH_PAR_FASTP_TRIM_JSON+x} ]; then VIASH_PAR_FASTP_TRIM_JSON="fastp/\$id_out.json" @@ -3605,11 +3514,14 @@ fi if [ -z ${VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE+x} ]; then VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE="dupradar/intercept_slope/\$id.intercept_slope.txt" fi -if [ -z ${VIASH_PAR_QUALIMAP_OUTPUT_PDF+x} ]; then - VIASH_PAR_QUALIMAP_OUTPUT_PDF="qualimap/\$id.qualimap_output.pdf" +if [ -z ${VIASH_PAR_QUALIMAP_QC_REPORT+x} ]; then + VIASH_PAR_QUALIMAP_QC_REPORT="Qualimap/\$id.rnaseq_qc_results.txt" fi -if [ -z ${VIASH_PAR_QUALIMAP_OUTPUT_DIR+x} ]; then - VIASH_PAR_QUALIMAP_OUTPUT_DIR="qualimap/\$id" +if [ -z ${VIASH_PAR_QUALIMAP_COUNTS+x} ]; then + VIASH_PAR_QUALIMAP_COUNTS="Qualimap/\$id.counts.txt" +fi +if [ -z ${VIASH_PAR_QUALIMAP_REPORT+x} ]; then + VIASH_PAR_QUALIMAP_REPORT="Qualimap/\$id.report.html" fi if [ -z ${VIASH_PAR_DESEQ2_OUTPUT+x} ]; then VIASH_PAR_DESEQ2_OUTPUT="deseq2_qc" @@ -3705,9 +3617,17 @@ if [ ! -z "$VIASH_PAR_KALLISTO_INDEX" ] && [ ! -e "$VIASH_PAR_KALLISTO_INDEX" ]; ViashError "Input file '$VIASH_PAR_KALLISTO_INDEX' does not exist." exit 1 fi -if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ] && [ ! -e "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then - ViashError "Input file '$VIASH_PAR_BBSPLIT_FASTA_LIST' does not exist." - exit 1 +if [ ! -z "$VIASH_PAR_BBSPLIT_FASTA_LIST" ]; then + IFS=';' + set -f + for file in $VIASH_PAR_BBSPLIT_FASTA_LIST; do + unset IFS + if [ ! -e "$file" ]; then + ViashError "Input file '$file' does not exist." + exit 1 + fi + done + set +f fi if [ ! -z "$VIASH_PAR_BBSPLIT_INDEX" ] && [ ! -e "$VIASH_PAR_BBSPLIT_INDEX" ]; then ViashError "Input file '$VIASH_PAR_BBSPLIT_INDEX' does not exist." @@ -3770,14 +3690,14 @@ if [[ -n "$VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE" ]]; then fi fi if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" ]]; then - if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--kallisto_quant_fragment_length' has to be an integer. Use "--help" to get more information on the parameters. + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length' has to be a double. Use "--help" to get more information on the parameters. exit 1 fi fi if [[ -n "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" ]]; then - if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--kallisto_quant_fragment_length_sd' has to be an integer. Use "--help" to get more information on the parameters. + if ! [[ "$VIASH_PAR_KALLISTO_QUANT_FRAGMENT_LENGTH_SD" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--kallisto_quant_fragment_length_sd' has to be a double. Use "--help" to get more information on the parameters. exit 1 fi fi @@ -4374,11 +4294,14 @@ fi if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -d "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" ]; then mkdir -p "$(dirname "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE")" fi -if [ ! -z "$VIASH_PAR_QUALIMAP_OUTPUT_PDF" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_PDF")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_PDF")" +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_QC_REPORT")" fi -if [ ! -z "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_DIR")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_OUTPUT_DIR")" +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_COUNTS")" +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -d "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_QUALIMAP_REPORT")" fi if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" ]; then mkdir -p "$(dirname "$VIASH_PAR_DESEQ2_OUTPUT")" @@ -4878,8 +4801,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "featurecounts": "featurecounts", "featurecounts_summary": "featurecounts_summary", "featurecounts_multiqc": "featurecounts_multiqc", @@ -4993,8 +4917,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "tpm_gene": "tpm_gene", "counts_gene": "counts_gene", "counts_gene_length_scaled": "counts_gene_length_scaled", @@ -5418,8 +5343,16 @@ if [ ! -z "$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE" ] && [ ! -e "$VIASH_PAR_D ViashError "Output file '$VIASH_PAR_DUPRADAR_OUTPUT_INTERCEPT_SLOPE' does not exist." exit 1 fi -if [ ! -z "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ] && [ ! -e "$VIASH_PAR_QUALIMAP_OUTPUT_DIR" ]; then - ViashError "Output file '$VIASH_PAR_QUALIMAP_OUTPUT_DIR' does not exist." +if [ ! -z "$VIASH_PAR_QUALIMAP_QC_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_QC_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_QC_REPORT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_COUNTS" ] && [ ! -e "$VIASH_PAR_QUALIMAP_COUNTS" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_COUNTS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_QUALIMAP_REPORT" ] && [ ! -e "$VIASH_PAR_QUALIMAP_REPORT" ]; then + ViashError "Output file '$VIASH_PAR_QUALIMAP_REPORT' does not exist." exit 1 fi if [ ! -z "$VIASH_PAR_DESEQ2_OUTPUT" ] && [ ! -e "$VIASH_PAR_DESEQ2_OUTPUT" ]; then diff --git a/target/nextflow/bbmap_bbsplit/.config.vsh.yaml b/target/nextflow/bbmap_bbsplit/.config.vsh.yaml deleted file mode 100644 index faf328e..0000000 --- a/target/nextflow/bbmap_bbsplit/.config.vsh.yaml +++ /dev/null @@ -1,268 +0,0 @@ -name: "bbmap_bbsplit" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "string" - name: "--id" - description: "Sample ID" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input fastq files, either one or two (paired)" - info: null - example: - - "sample.fastq" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: "," - - type: "file" - name: "--primary_ref" - description: "Primary reference FASTA" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--only_build_index" - description: "true = only build index; false = mapping" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--built_bbsplit_index" - description: "Directory with index files" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--fastq_1" - description: "Output file for read 1." - info: null - default: - - "$id.$key.read_1.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastq_2" - description: "Output file for read 2." - info: null - default: - - "$id.$key.read_2.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bbsplit_index" - description: "Directory with index files" - info: null - default: - - "BBSplit_index" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Split sequencing reads by mapping them to multiple references simultaneously.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "genome.fasta" -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -- type: "file" - path: "sarscov2.fa" -- type: "file" - path: "human.fa" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/bbmap/bbsplit/main.nf" - - "modules/nf-core/bbmap/bbsplit/meta.yml" - last_sha: "277bd337739a8b8f753fa7b5eda6743b9b6acb89" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "docker" - run: - - "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\ - \ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\ - \ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/bbmap_bbsplit/config.vsh.yaml" - runner: "nextflow" - engine: "docker|native" - output: "target/nextflow/bbmap_bbsplit" - executable: "target/nextflow/bbmap_bbsplit/main.nf" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/nextflow/bedtools_genomecov/.config.vsh.yaml b/target/nextflow/bedtools_genomecov/.config.vsh.yaml index 6e5d619..8cd391c 100644 --- a/target/nextflow/bedtools_genomecov/.config.vsh.yaml +++ b/target/nextflow/bedtools_genomecov/.config.vsh.yaml @@ -83,7 +83,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -178,8 +178,8 @@ build_info: output: "target/nextflow/bedtools_genomecov" executable: "target/nextflow/bedtools_genomecov/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -190,7 +190,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/bedtools_genomecov/main.nf b/target/nextflow/bedtools_genomecov/main.nf index 1379d70..ab3bf7f 100644 --- a/target/nextflow/bedtools_genomecov/main.nf +++ b/target/nextflow/bedtools_genomecov/main.nf @@ -2919,7 +2919,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3031,8 +3031,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/bedtools_genomecov", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3049,7 +3049,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/cat_additional_fasta/.config.vsh.yaml b/target/nextflow/cat_additional_fasta/.config.vsh.yaml index 441686f..3f1b884 100644 --- a/target/nextflow/cat_additional_fasta/.config.vsh.yaml +++ b/target/nextflow/cat_additional_fasta/.config.vsh.yaml @@ -93,7 +93,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -182,8 +182,8 @@ build_info: output: "target/nextflow/cat_additional_fasta" executable: "target/nextflow/cat_additional_fasta/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -194,7 +194,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/cat_additional_fasta/main.nf b/target/nextflow/cat_additional_fasta/main.nf index 64bce87..c1c615b 100644 --- a/target/nextflow/cat_additional_fasta/main.nf +++ b/target/nextflow/cat_additional_fasta/main.nf @@ -2928,7 +2928,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3032,8 +3032,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/cat_additional_fasta", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3050,7 +3050,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/cat_fastq/.config.vsh.yaml b/target/nextflow/cat_fastq/.config.vsh.yaml index 873a150..9bfbc3e 100644 --- a/target/nextflow/cat_fastq/.config.vsh.yaml +++ b/target/nextflow/cat_fastq/.config.vsh.yaml @@ -80,7 +80,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -169,8 +169,8 @@ build_info: output: "target/nextflow/cat_fastq" executable: "target/nextflow/cat_fastq/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -181,7 +181,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/cat_fastq/main.nf b/target/nextflow/cat_fastq/main.nf index 78c1104..f1d8e66 100644 --- a/target/nextflow/cat_fastq/main.nf +++ b/target/nextflow/cat_fastq/main.nf @@ -2919,7 +2919,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3023,8 +3023,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/cat_fastq", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3041,7 +3041,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/deseq2_qc/.config.vsh.yaml b/target/nextflow/deseq2_qc/.config.vsh.yaml index a45a81d..92c91ee 100644 --- a/target/nextflow/deseq2_qc/.config.vsh.yaml +++ b/target/nextflow/deseq2_qc/.config.vsh.yaml @@ -136,7 +136,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -237,8 +237,8 @@ build_info: output: "target/nextflow/deseq2_qc" executable: "target/nextflow/deseq2_qc/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -249,7 +249,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/deseq2_qc/main.nf b/target/nextflow/deseq2_qc/main.nf index ccbcdac..53ea4ea 100644 --- a/target/nextflow/deseq2_qc/main.nf +++ b/target/nextflow/deseq2_qc/main.nf @@ -2983,7 +2983,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3104,8 +3104,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/deseq2_qc", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3122,7 +3122,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/dupradar/.config.vsh.yaml b/target/nextflow/dupradar/.config.vsh.yaml index 540280b..90ff245 100644 --- a/target/nextflow/dupradar/.config.vsh.yaml +++ b/target/nextflow/dupradar/.config.vsh.yaml @@ -168,7 +168,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -266,8 +266,8 @@ build_info: output: "target/nextflow/dupradar" executable: "target/nextflow/dupradar/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -278,7 +278,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/dupradar/main.nf b/target/nextflow/dupradar/main.nf index df68edd..1471604 100644 --- a/target/nextflow/dupradar/main.nf +++ b/target/nextflow/dupradar/main.nf @@ -3016,7 +3016,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3136,8 +3136,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/dupradar", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3154,7 +3154,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/fastqc/.config.vsh.yaml b/target/nextflow/fastqc/.config.vsh.yaml deleted file mode 100644 index aae4bd1..0000000 --- a/target/nextflow/fastqc/.config.vsh.yaml +++ /dev/null @@ -1,228 +0,0 @@ -name: "fastqc" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input fastq files, either one or two (paired)" - info: null - example: - - "sample.fastq" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," -- name: "Output" - arguments: - - type: "file" - name: "--fastqc_html_1" - description: "FastQC HTML report for read 1." - info: null - default: - - "$id.read_1.fastqc.html" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastqc_html_2" - description: "FastQC HTML report for read 2." - info: null - default: - - "$id.read_2.fastqc.html" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastqc_zip_1" - description: "FastQC report archive for read 1." - info: null - default: - - "$id.read_1.fastqc.zip" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastqc_zip_2" - description: "FastQC report archive for read 2." - info: null - default: - - "$id.read_2.fastqc.zip" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/.\ - \ This component can take one or more files (by means of shell globbing) or a complete\ - \ directory.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/fastqc/main.nf" - - "modules/nf-core/fastqc/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "fastqc" - interactive: false - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/fastqc/config.vsh.yaml" - runner: "nextflow" - engine: "docker|native" - output: "target/nextflow/fastqc" - executable: "target/nextflow/fastqc/main.nf" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/nextflow/fastqc/main.nf b/target/nextflow/fastqc/main.nf deleted file mode 100644 index 5832da6..0000000 --- a/target/nextflow/fastqc/main.nf +++ /dev/null @@ -1,3659 +0,0 @@ -// fastqc main -// -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' -class UnexpectedArgumentTypeException extends Exception { - String errorIdentifier - String stage - String plainName - String expectedClass - String foundClass - - // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} - UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { - super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + - "Expected type: ${expectedClass}. Found type: ${foundClass}") - this.errorIdentifier = errorIdentifier - this.stage = stage - this.plainName = plainName - this.expectedClass = expectedClass - this.foundClass = foundClass - } -} - -/** - * Checks if the given value is of the expected type. If not, an exception is thrown. - * - * @param stage The stage of the argument (input or output) - * @param par The parameter definition - * @param value The value to check - * @param errorIdentifier The identifier to use in the error message - * @return The value, if it is of the expected type - * @throws UnexpectedArgumentTypeException If the value is not of the expected type -*/ -def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { - // expectedClass will only be != null if value is not of the expected type - def expectedClass = null - def foundClass = null - - // todo: split if need be - - if (!par.required && value == null) { - expectedClass = null - } else if (par.multiple) { - if (value !instanceof Collection) { - value = [value] - } - - // split strings - value = value.collectMany{ val -> - if (val instanceof String) { - // collect() to ensure that the result is a List and not simply an array - val.split(par.multiple_sep).collect() - } else { - [val] - } - } - - // process globs - if (par.type == "file" && par.direction == "input") { - value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() - } - - // check types of elements in list - try { - value = value.collect { listVal -> - _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) - } - } catch (UnexpectedArgumentTypeException e) { - expectedClass = "List[${e.expectedClass}]" - foundClass = "List[${e.foundClass}]" - } - } else if (par.type == "string") { - // cast to string if need be - if (value instanceof GString) { - value = value.toString() - } - expectedClass = value instanceof String ? null : "String" - } else if (par.type == "integer") { - // cast to integer if need be - if (value instanceof String) { - try { - value = value.toInteger() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" - } else if (par.type == "long") { - // cast to long if need be - if (value instanceof String) { - try { - value = value.toLong() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" - } else if (par.type == "double") { - // cast to double if need be - if (value instanceof String) { - try { - value = value.toDouble() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() - } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" - } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { - // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false - } - } - expectedClass = value instanceof Boolean ? null : "Boolean" - } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { - // cast to path if need be - if (value instanceof String) { - value = file(value, hidden: true) - } - if (value instanceof File) { - value = value.toPath() - } - expectedClass = value instanceof Path ? null : "Path" - } else if (par.type == "file" && stage == "input" && par.direction == "output") { - // cast to string if need be - if (value instanceof GString) { - value = value.toString() - } - expectedClass = value instanceof String ? null : "String" - } else { - // didn't find a match for par.type - expectedClass = par.type - } - - if (expectedClass != null) { - if (foundClass == null) { - foundClass = value.getClass().getName() - } - throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) - } - - return value -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' -Map _processInputValues(Map inputs, Map config, String id, String key) { - if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.required) { - assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" - } - } - - inputs = inputs.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" - - value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") - - [ name, value ] - } - } - return inputs -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { - if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - - outputs = outputs.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && it.direction == "output" } - assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" - - value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") - - [ name, value ] - } - } - return outputs -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' -class IDChecker { - final def items = [] as Set - - @groovy.transform.WithWriteLock - boolean observe(String item) { - if (items.contains(item)) { - return false - } else { - items << item - return true - } - } - - @groovy.transform.WithReadLock - boolean contains(String item) { - return items.contains(item) - } - - @groovy.transform.WithReadLock - Set getItems() { - return items.clone() - } -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' - -// helper functions for reading params from file // -def _getChild(parent, child) { - if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' -/** - * Figure out the param list format based on the file extension - * - * @param param_list A String containing the path to the parameter list file. - * - * @return A String containing the format of the parameter list file. - */ -def _paramListGuessFormat(param_list) { - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } -} - - -/** - * Read the param list - * - * @param param_list One of the following: - * - A String containing the path to the parameter list file (csv, json or yaml), - * - A yaml blob of a list of maps (yaml_blob), - * - Or a groovy list of maps (asis). - * @param config A Map of the Viash configuration. - * - * @return A List of Maps containing the parameters. - */ -def _parseParamList(param_list, Map config) { - // first determine format by extension - def paramListFormat = _paramListGuessFormat(param_list) - - def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? - file(param_list, hidden: true) : - null - - // get the correct parser function for the detected params_list format - def paramSets = [] - if (paramListFormat == "asis") { - paramSets = param_list - } else if (paramListFormat == "yaml_blob") { - paramSets = readYamlBlob(param_list) - } else if (paramListFormat == "yaml") { - paramSets = readYaml(paramListPath) - } else if (paramListFormat == "json") { - paramSets = readJson(paramListPath) - } else if (paramListFormat == "csv") { - paramSets = readCsv(paramListPath) - } else { - error "Format of provided --param_list not recognised.\n" + - "Found: '$paramListFormat'.\n" + - "Expected: a csv file, a json file, a yaml file,\n" + - "a yaml blob or a groovy list of maps." - } - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // id is argument - def idIsArgument = config.allArguments.any{it.plainName == "id"} - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ data -> - def id = data.id - if (!idIsArgument) { - data = data.findAll{k, v -> k != "id"} - } - [id, data] - }) - - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, data -> - data = _splitParams(data, config) - [id, data] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListPath) { - paramSets = paramSets.collect({ id, data -> - def new_data = data.collectEntries{ parName, parValue -> - def par = config.allArguments.find{it.plainName == parName} - if (par && par.type == "file" && par.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collectMany{path -> - def x = _resolveSiblingIfNotAbsolute(path, paramListPath) - x instanceof Collection ? x : [x] - } - } else { - parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) - } - } - [parName, parValue] - } - [id, new_data] - }) - } - - return paramSets -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - // todo: fetch key from run args - def key_ = config.name - - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - - /* process params_list arguments */ - /*********************************/ - def paramList = params.containsKey("param_list") && params.param_list != null ? - params.param_list : [] - // if (paramList instanceof String) { - // paramList = [paramList] - // } - // def paramSets = paramList.collectMany{ _parseParamList(it, config) } - // TODO: be able to process param_list when it is a list of strings - def paramSets = _parseParamList(paramList, config) - if (paramSets.isEmpty()) { - paramSets = [[null, [:]]] - } - - /* combine arguments into channel */ - /**********************************/ - def processedParams = paramSets.indexed().collect{ index, tup -> - // Process ID - def id = tup[0] ?: globalID - - if (workflow.stubRun && !id) { - // if stub run, explicitly add an id if missing - id = "stub${index}" - } - assert id != null: "Each parameter set should have at least an 'id'" - - // Process params - def parValues = globalParams + tup[1] - // // Remove parameters which are null, if the default is also null - // parValues = parValues.collectEntries{paramName, paramValue -> - // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - // if ( paramValue != null || parameterSettings.get("default", null) != null ) { - // [paramName, paramValue] - // } - // } - parValues = parValues.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" - - if (par == null) { - return [:] - } - value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") - - [ name, value ] - } - - [id, parValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - def processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' -def checkUniqueIds(Map args) { - def stopOnError = args.stopOnError == null ? args.stopOnError : true - - def idChecker = new IDChecker() - - return filter { tup -> - if (!idChecker.observe(tup[0])) { - if (stopOnError) { - error "Duplicate id: ${tup[0]}" - } else { - log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" - return false - } - } - return true - } -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' -// This helper file will be deprecated soon -preprocessInputsDeprecationWarningPrinted = false - -def preprocessInputsDeprecationWarning() { - if (!preprocessInputsDeprecationWarningPrinted) { - preprocessInputsDeprecationWarningPrinted = true - System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") - } -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - preprocessInputsDeprecationWarning() - - def config = args.config - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - def key_ = args.key ?: config.name - - // Get different parameter types (used throughout this function) - def defaultArgs = config.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - map { tup -> - def id = tup[0] - def data = tup[1] - def passthrough = tup.drop(2) - - def new_data = (defaultArgs + data).collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - - if (par != null) { - value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") - } - - [ name, value ] - } - - [ id, new_data ] + passthrough - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' -/** - * Run a list of components on a stream of data. - * - * @param components: list of Viash VDSL3 modules to run - * @param fromState: a closure, a map or a list of keys to extract from the input data. - * If a closure, it will be called with the id, the data and the component config. - * @param toState: a closure, a map or a list of keys to extract from the output data - * If a closure, it will be called with the id, the output data, the old state and the component config. - * @param filter: filter function to apply to the input. - * It will be called with the id, the data and the component config. - * @param id: id to use for the output data - * If a closure, it will be called with the id, the data and the component config. - * @param auto: auto options to pass to the components - * - * @return: a workflow that runs the components - **/ -def runComponents(Map args) { - log.warn("runComponents is deprecated, use runEach instead") - assert args.components: "runComponents should be passed a list of components to run" - - def components_ = args.components - if (components_ !instanceof List) { - components_ = [ components_ ] - } - assert components_.size() > 0: "pass at least one component to runComponents" - - def fromState_ = args.fromState - def toState_ = args.toState - def filter_ = args.filter - def id_ = args.id - - workflow runComponentsWf { - take: input_ch - main: - - // generate one channel per method - out_chs = components_.collect{ comp_ -> - def comp_config = comp_.config - - def filter_ch = filter_ - ? input_ch | filter{tup -> - filter_(tup[0], tup[1], comp_config) - } - : input_ch - def id_ch = id_ - ? filter_ch | map{tup -> - // def new_id = id_(tup[0], tup[1], comp_config) - def new_id = tup[0] - if (id_ instanceof String) { - new_id = id_ - } else if (id_ instanceof Closure) { - new_id = id_(new_id, tup[1], comp_config) - } - [new_id] + tup.drop(1) - } - : filter_ch - def data_ch = id_ch | map{tup -> - def new_data = tup[1] - if (fromState_ instanceof Map) { - new_data = fromState_.collectEntries{ key0, key1 -> - [key0, new_data[key1]] - } - } else if (fromState_ instanceof List) { - new_data = fromState_.collectEntries{ key -> - [key, new_data[key]] - } - } else if (fromState_ instanceof Closure) { - new_data = fromState_(tup[0], new_data, comp_config) - } - tup.take(1) + [new_data] + tup.drop(1) - } - def out_ch = data_ch - | comp_.run( - auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] - ) - def post_ch = toState_ - ? out_ch | map{tup -> - def output = tup[1] - def old_state = tup[2] - def new_state = null - if (toState_ instanceof Map) { - new_state = old_state + toState_.collectEntries{ key0, key1 -> - [key0, output[key1]] - } - } else if (toState_ instanceof List) { - new_state = old_state + toState_.collectEntries{ key -> - [key, output[key]] - } - } else if (toState_ instanceof Closure) { - new_state = toState_(tup[0], output, old_state, comp_config) - } - [tup[0], new_state] + tup.drop(3) - } - : out_ch - - post_ch - } - - // mix all results - output_ch = - (out_chs.size == 1) - ? out_chs[0] - : out_chs[0].mix(*out_chs.drop(1)) - - emit: output_ch - } - - return runComponentsWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' -/** - * Run a list of components on a stream of data. - * - * @param components: list of Viash VDSL3 modules to run - * @param fromState: a closure, a map or a list of keys to extract from the input data. - * If a closure, it will be called with the id, the data and the component itself. - * @param toState: a closure, a map or a list of keys to extract from the output data - * If a closure, it will be called with the id, the output data, the old state and the component itself. - * @param filter: filter function to apply to the input. - * It will be called with the id, the data and the component itself. - * @param id: id to use for the output data - * If a closure, it will be called with the id, the data and the component itself. - * @param auto: auto options to pass to the components - * - * @return: a workflow that runs the components - **/ -def runEach(Map args) { - assert args.components: "runEach should be passed a list of components to run" - - def components_ = args.components - if (components_ !instanceof List) { - components_ = [ components_ ] - } - assert components_.size() > 0: "pass at least one component to runEach" - - def fromState_ = args.fromState - def toState_ = args.toState - def filter_ = args.filter - def runIf_ = args.runIf - def id_ = args.id - - assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." - - workflow runEachWf { - take: input_ch - main: - - // generate one channel per method - out_chs = components_.collect{ comp_ -> - def filter_ch = filter_ - ? input_ch | filter{tup -> - filter_(tup[0], tup[1], comp_) - } - : input_ch - def id_ch = id_ - ? filter_ch | map{tup -> - def new_id = id_ - if (new_id instanceof Closure) { - new_id = new_id(tup[0], tup[1], comp_) - } - assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" - [new_id] + tup.drop(1) - } - : filter_ch - def chPassthrough = null - def chRun = null - if (runIf_) { - def idRunIfBranch = id_ch.branch{ tup -> - run: runIf_(tup[0], tup[1], comp_) - passthrough: true - } - chPassthrough = idRunIfBranch.passthrough - chRun = idRunIfBranch.run - } else { - chRun = id_ch - chPassthrough = Channel.empty() - } - def data_ch = chRun | map{tup -> - def new_data = tup[1] - if (fromState_ instanceof Map) { - new_data = fromState_.collectEntries{ key0, key1 -> - [key0, new_data[key1]] - } - } else if (fromState_ instanceof List) { - new_data = fromState_.collectEntries{ key -> - [key, new_data[key]] - } - } else if (fromState_ instanceof Closure) { - new_data = fromState_(tup[0], new_data, comp_) - } - tup.take(1) + [new_data] + tup.drop(1) - } - def out_ch = data_ch - | comp_.run( - auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] - ) - def post_ch = toState_ - ? out_ch | map{tup -> - def output = tup[1] - def old_state = tup[2] - def new_state = null - if (toState_ instanceof Map) { - new_state = old_state + toState_.collectEntries{ key0, key1 -> - [key0, output[key1]] - } - } else if (toState_ instanceof List) { - new_state = old_state + toState_.collectEntries{ key -> - [key, output[key]] - } - } else if (toState_ instanceof Closure) { - new_state = toState_(tup[0], output, old_state, comp_) - } - [tup[0], new_state] + tup.drop(3) - } - : out_ch - - def return_ch = post_ch - | concat(chPassthrough) - - return_ch - } - - // mix all results - output_ch = - (out_chs.size == 1) - ? out_chs[0] - : out_chs[0].mix(*out_chs.drop(1)) - - emit: output_ch - } - - return runEachWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' -/** - * Join sourceChannel to targetChannel - * - * This function joins the sourceChannel to the targetChannel. - * However, each id in the targetChannel must be present in the - * sourceChannel. If _meta.join_id exists in the targetChannel, that is - * used as an id instead. If the id doesn't match any id in the sourceChannel, - * an error is thrown. - */ - -def safeJoin(targetChannel, sourceChannel, key) { - def sourceIDs = new IDChecker() - - def sourceCheck = sourceChannel - | map { tup -> - sourceIDs.observe(tup[0]) - tup - } - def targetCheck = targetChannel - | map { tup -> - def id = tup[0] - - if (!sourceIDs.contains(id)) { - error ( - "Error in module '${key}' when merging output with original state.\n" + - " Reason: output with id '${id}' could not be joined with source channel.\n" + - " If the IDs in the output channel differ from the input channel,\n" + - " please set `tup[1]._meta.join_id to the original ID.\n" + - " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + - " Unexpected ID in the output channel: '${id}'.\n" + - " Example input event: [\"id\", [input: file(...)]],\n" + - " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" - ) - } - // TODO: add link to our documentation on how to fix this - - tup - } - - sourceCheck.cross(targetChannel) - | map{ left, right -> - right + left.drop(1) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' -def _processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - // add default values to output files which haven't already got a default - if (arg.type == "file" && arg.direction == "output" && arg.default == null) { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - if (arg.multiple) { - arg.default = [arg.default] - } - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' -def addGlobalArguments(config) { - def localConfig = [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ] - // TODO: allow multiple: true in param_list? - // TODO: allow to specify a --param_list_regex to filter the param_list? - // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? - ] - ] - ] - ] - - return processConfig(_mergeMap(config, localConfig)) -} - -def _mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = _mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' -def _generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def _generateHelp(config) { - def fun = config - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -// based on Format._paragraphWrap -def _paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def helpMessage(config) { - if (params.containsKey("help") && params.help) { - def mergedConfig = addGlobalArguments(config) - def helpStr = _generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' -def processConfig(config) { - // set defaults for arguments - config.arguments = - (config.arguments ?: []).collect{_processArgument(it)} - - // set defaults for argument_group arguments - config.argument_groups = - (config.argument_groups ?: []).collect{grp -> - grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} - grp - } - - // create combined arguments list - config.allArguments = - config.arguments + - config.argument_groups.collectMany{it.arguments} - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.argument_groups - if (argGroups.any{it.name.toLowerCase() == "arguments"}) { - argGroups = argGroups.collect{ grp -> - if (grp.name.toLowerCase() == "arguments") { - grp = grp + [ - arguments: grp.arguments + config.arguments - ] - } - grp - } - } else { - argGroups = argGroups + [ - name: "Arguments", - arguments: config.arguments - ] - } - config.allArgumentGroups = argGroups - - config -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' - -def readConfig(file) { - def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) - processConfig(config) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' -/** - * Resolve a path relative to the current file. - * - * @param str The path to resolve, as a String. - * @param parentPath The path to resolve relative to, as a Path. - * - * @return The path that may have been resovled, as a Path. - */ -def _resolveSiblingIfNotAbsolute(str, parentPath) { - if (str !instanceof String) { - return str - } - if (!_stringIsAbsolutePath(str)) { - return parentPath.resolveSibling(str) - } else { - return file(str, hidden: true) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' -/** - * Check whether a path as a string is absolute. - * - * In the past, we tried using `file(., relative: true).isAbsolute()`, - * but the 'relative' option was added in 22.10.0. - * - * @param path The path to check, as a String. - * - * @return Whether the path is absolute, as a boolean. - */ -def _stringIsAbsolutePath(path) { - def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ - - assert path instanceof String - return _resolve_URL_PROTOCOL.matcher(path).matches() -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' -class CustomTraceObserver implements nextflow.trace.TraceObserver { - List traces - - CustomTraceObserver(List traces) { - this.traces = traces - } - - @Override - void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { - def trace2 = trace.store.clone() - trace2.script = null - traces.add(trace2) - } - - @Override - void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { - def trace2 = trace.store.clone() - trace2.script = null - traces.add(trace2) - } -} - -def collectTraces() { - def traces = Collections.synchronizedList([]) - - // add custom trace observer which stores traces in the traces object - session.observers.add(new CustomTraceObserver(traces)) - - traces -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' -/** - * Performs a deep clone of the given object. - * @param x an object - */ -def deepClone(x) { - iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' -def getPublishDir() { - return params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' - -// Recurse upwards until we find a '.build.yaml' file -def _findBuildYamlFile(pathPossiblySymlink) { - def path = pathPossiblySymlink.toRealPath() - def child = path.resolve(".build.yaml") - if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { - return child - } else { - def parent = path.getParent() - if (parent == null) { - return null - } else { - return _findBuildYamlFile(parent) - } - } -} - -// get the root of the target folder -def getRootDir() { - def dir = _findBuildYamlFile(meta.resources_dir) - assert dir != null: "Could not find .build.yaml in the folder structure" - dir.getParent() -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' -/** - * Recursively apply a function over the leaves of an object. - * @param obj The object to iterate over. - * @param fun The function to apply to each value. - * @return The object with the function applied to each value. - */ -def iterateMap(obj, fun) { - if (obj instanceof List && obj !instanceof String) { - return obj.collect{item -> - iterateMap(item, fun) - } - } else if (obj instanceof Map) { - return obj.collectEntries{key, item -> - [key.toString(), iterateMap(item, fun)] - } - } else { - return fun(obj) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' -/** - * A view for printing the event of each channel as a YAML blob. - * This is useful for debugging. - */ -def niceView() { - workflow niceViewWf { - take: input - main: - output = input - | view{toYamlBlob(it)} - emit: output - } - return niceViewWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') - - def br = java.nio.file.Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - def m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - def jsonSlurper = new groovy.json.JsonSlurper() - jsonSlurper.parse(inputFile) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' -def readJsonBlob(str) { - def jsonSlurper = new groovy.json.JsonSlurper() - jsonSlurper.parseText(str) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' -// Custom constructor to modify how certain objects are parsed from YAML -class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { - Path root - - class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { - public Object construct(org.yaml.snakeyaml.nodes.Node node) { - String filename = (String) constructScalar(node); - if (root != null) { - return root.resolve(filename); - } - return java.nio.file.Paths.get(filename); - } - } - - CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { - super(options) - this.root = root - // Handling !file tag and parse it back to a File type - this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) - } -} - -def readTaggedYaml(Path path) { - def options = new org.yaml.snakeyaml.LoaderOptions() - def constructor = new CustomConstructor(options, path.getParent()) - def yaml = new org.yaml.snakeyaml.Yaml(constructor) - return yaml.load(path.text) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - def yamlSlurper = new org.yaml.snakeyaml.Yaml() - yamlSlurper.load(inputFile) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' -def readYamlBlob(str) { - def yamlSlurper = new org.yaml.snakeyaml.Yaml() - yamlSlurper.load(str) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' -String toJsonBlob(data) { - return groovy.json.JsonOutput.toJson(data) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' -// Custom representer to modify how certain objects are represented in YAML -class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { - Path relativizer - - class RepresentPath implements org.yaml.snakeyaml.representer.Represent { - public String getFileName(Object obj) { - if (obj instanceof File) { - obj = ((File) obj).toPath(); - } - if (obj !instanceof Path) { - throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); - } - def path = (Path) obj; - - if (relativizer != null) { - return relativizer.relativize(path).toString() - } else { - return path.toString() - } - } - - public org.yaml.snakeyaml.nodes.Node representData(Object data) { - String filename = getFileName(data); - def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); - return representScalar(tag, filename); - } - } - CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { - super(options) - this.relativizer = relativizer - this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) - this.representers.put(Path, new RepresentPath()) - this.representers.put(File, new RepresentPath()) - } -} - -String toTaggedYamlBlob(data) { - return toRelativeTaggedYamlBlob(data, null) -} -String toRelativeTaggedYamlBlob(data, Path relativizer) { - def options = new org.yaml.snakeyaml.DumperOptions() - options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) - def representer = new CustomRepresenter(options, relativizer) - def yaml = new org.yaml.snakeyaml.Yaml(representer, options) - return yaml.dump(data) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' -String toYamlBlob(data) { - def options = new org.yaml.snakeyaml.DumperOptions() - options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) - options.setPrettyFlow(true) - def yaml = new org.yaml.snakeyaml.Yaml(options) - def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) - return yaml.dump(cleanData) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' -void writeJson(data, file) { - assert data: "writeJson: data should not be null" - assert file: "writeJson: file should not be null" - file.write(toJsonBlob(data)) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' -void writeYaml(data, file) { - assert data: "writeYaml: data should not be null" - assert file: "writeYaml: file should not be null" - file.write(toYamlBlob(data)) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' -def findStates(Map params, Map config) { - def auto_config = deepClone(config) - def auto_params = deepClone(params) - - auto_config = auto_config.clone() - // override arguments - auto_config.argument_groups = [] - auto_config.arguments = [ - [ - type: "string", - name: "--id", - description: "A dummy identifier", - required: false - ], - [ - type: "file", - name: "--input_states", - example: "/path/to/input/directory/**/state.yaml", - description: "Path to input directory containing the datasets to be integrated.", - required: true, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--filter", - example: "foo/.*/state.yaml", - description: "Regex to filter state files by path.", - required: false - ], - // to do: make this a yaml blob? - [ - type: "string", - name: "--rename_keys", - example: ["newKey1:oldKey1", "newKey2:oldKey2"], - description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", - required: false, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--settings", - example: '{"output_dataset": "dataset.h5ad", "k": 10}', - description: "Global arguments as a JSON glob to be passed to all components.", - required: false - ] - ] - if (!(auto_params.containsKey("id"))) { - auto_params["id"] = "auto" - } - - // run auto config through processConfig once more - auto_config = processConfig(auto_config) - - workflow findStatesWf { - helpMessage(auto_config) - - output_ch = - channelFromParams(auto_params, auto_config) - | flatMap { autoId, args -> - - def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] - - // look for state files in input dir - def stateFiles = args.input_states - - // filter state files by regex - if (args.filter) { - stateFiles = stateFiles.findAll{ stateFile -> - def stateFileStr = stateFile.toString() - def matcher = stateFileStr =~ args.filter - matcher.matches()} - } - - // read in states - def states = stateFiles.collect { stateFile -> - def state_ = readTaggedYaml(stateFile) - [state_.id, state_] - } - - // construct renameMap - if (args.rename_keys) { - def renameMap = args.rename_keys.collectEntries{renameString -> - def split = renameString.split(":") - assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" - split - } - - // rename keys in state, only let states through which have all keys - // also add global settings - states = states.collectMany{id, state -> - def newState = [:] - - for (key in renameMap.keySet()) { - def origKey = renameMap[key] - if (!(state.containsKey(origKey))) { - return [] - } - newState[key] = state[origKey] - } - - [[id, globalSettings + newState]] - } - } - - states - } - emit: - output_ch - } - - return findStatesWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' -def joinStates(Closure apply_) { - workflow joinStatesWf { - take: input_ch - main: - output_ch = input_ch - | toSortedList - | filter{ it.size() > 0 } - | map{ tups -> - def ids = tups.collect{it[0]} - def states = tups.collect{it[1]} - apply_(ids, states) - } - - emit: output_ch - } - return joinStatesWf -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' -def collectFiles(obj) { - if (obj instanceof java.io.File || obj instanceof Path) { - return [obj] - } else if (obj instanceof List && obj !instanceof String) { - return obj.collectMany{item -> - collectFiles(item) - } - } else if (obj instanceof Map) { - return obj.collectMany{key, item -> - collectFiles(item) - } - } else { - return [] - } -} - -/** - * Recurse through a state and collect all input files and their target output filenames. - * @param obj The state to recurse through. - * @param prefix The prefix to prepend to the output filenames. - */ -def collectInputOutputPaths(obj, prefix) { - if (obj instanceof File || obj instanceof Path) { - def path = obj instanceof Path ? obj : obj.toPath() - def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" - def newFilename = prefix + ext - return [[obj, newFilename]] - } else if (obj instanceof List && obj !instanceof String) { - return obj.withIndex().collectMany{item, ix -> - collectInputOutputPaths(item, prefix + "_" + ix) - } - } else if (obj instanceof Map) { - return obj.collectMany{key, item -> - collectInputOutputPaths(item, prefix + "." + key) - } - } else { - return [] - } -} - -def publishStates(Map args) { - def key_ = args.get("key") - def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) - - assert key_ != null : "publishStates: key must be specified" - - workflow publishStatesWf { - take: input_ch - main: - input_ch - | map { tup -> - def id_ = tup[0] - def state_ = tup[1] - - // the input files and the target output filenames - def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] - - def yamlFilename = yamlTemplate_ - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - - // TODO: do the pathnames in state_ match up with the outputFilenames_? - - // convert state to yaml blob - def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] - } - | publishStatesProc - emit: input_ch - } - return publishStatesWf -} -process publishStatesProc { - // todo: check publishpath? - publishDir path: "${getPublishDir()}/", mode: "copy" - tag "$id" - input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) - output: - tuple val(id), path{[yamlFile] + outputFiles} - script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } - """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" -} - - -// this assumes that the state contains no other values other than those specified in the config -def publishStatesByConfig(Map args) { - def config = args.get("config") - assert config != null : "publishStatesByConfig: config must be specified" - - def key_ = args.get("key", config.name) - assert key_ != null : "publishStatesByConfig: key must be specified" - - workflow publishStatesSimpleWf { - take: input_ch - main: - input_ch - | map { tup -> - def id_ = tup[0] - def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] - def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] - - // TODO: allow overriding the state.yaml template - // TODO TODO: if auto.publish == "state", add output_state as an argument - def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' - def yamlFilename = yamlTemplate - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where - // - key is a String - // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] - // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) - def processedState = - config.allArguments - .findAll { it.direction == "output" } - .collectMany { par -> - def plainName_ = par.plainName - // if the state does not contain the key, it's an - // optional argument for which the component did - // not generate any output - if (!state_.containsKey(plainName_)) { - return [] - } - def value = state_[plainName_] - // if the parameter is not a file, it should be stored - // in the state as-is, but is not something that needs - // to be copied from the source path to the dest path - if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] - } - // if the orig state does not contain this filename, - // it's an optional argument for which the user specified - // that it should not be returned as a state - if (!origState_.containsKey(plainName_)) { - return [] - } - def filenameTemplate = origState_[plainName_] - // if the pararameter is multiple: true, fetch the template - if (par.multiple && filenameTemplate instanceof List) { - filenameTemplate = filenameTemplate[0] - } - // instantiate the template - def filename = filenameTemplate - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - if (par.multiple) { - // if the parameter is multiple: true, the filename - // should contain a wildcard '*' that is replaced with - // the index of the file - assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" - def outputPerFile = value.withIndex().collect{ val, ix -> - def filename_ix = filename.replace("*", ix.toString()) - def value_ = java.nio.file.Paths.get(filename_ix) - // if id contains a slash - if (yamlDir != null) { - value_ = yamlDir.relativize(value_) - } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] - } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] - } else { - def value_ = java.nio.file.Paths.get(filename) - // if id contains a slash - if (yamlDir != null) { - value_ = yamlDir.relativize(value_) - } - def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] - } - } - - def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} - - // convert state to yaml blob - def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] - } - | publishStatesProc - emit: input_ch - } - return publishStatesSimpleWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' -def setState(fun) { - assert fun instanceof Closure || fun instanceof Map || fun instanceof List : - "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" - - // if fun is a List, convert to map - if (fun instanceof List) { - // check whether fun is a list[string] - assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" - fun = fun.collectEntries{[it, it]} - } - - // if fun is a map, convert to closure - if (fun instanceof Map) { - // check whether fun is a map[string, string] - assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" - assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" - def funMap = fun.clone() - // turn the map into a closure to be used later on - fun = { id_, state_ -> - assert state_ instanceof Map : "Error in setState: the state is not a Map" - funMap.collectMany{newkey, origkey -> - if (state_.containsKey(origkey)) { - [[newkey, state_[origkey]]] - } else { - [] - } - }.collectEntries() - } - } - - map { tup -> - def id = tup[0] - def state = tup[1] - def unfilteredState = fun(id, state) - def newState = unfilteredState.findAll{key, val -> val != null} - [id, newState] + tup.drop(2) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - // check for unexpected keys - def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - def unexpectedKeys = auto.keySet() - expectedKeys - assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" - - // check auto.simplifyInput - assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" - - // check auto.simplifyOutput - assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" - - // check auto.transcript - assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" - - // check auto.publish - assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" - - return auto.subMap(expectedKeys) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - // check for unexpected keys - def expectedKeys = [ - "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" - ] - def unexpectedKeys = drctv.keySet() - expectedKeys - assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' -def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { - // override defaults with args - def workflowArgs = defaultWfArgs + args - - // check whether 'key' exists - assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (workflowArgs["key"] instanceof Closure) { - workflowArgs["key"] = workflowArgs["key"](meta.config.name) - } - def key = workflowArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check for any unexpected keys - def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] - def unexpectedKeys = workflowArgs.keySet() - expectedKeys - assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" - - // check whether directives exists and apply defaults - assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" - workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) - - // check whether directives exists and apply defaults - assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" - workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) - - // auto define publish, if so desired - if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = getPublishDir() - - if (publishDir != null) { - workflowArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (workflowArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] - workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { - if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { - assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" - } - } - - // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? - for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { - if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { - log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." - } - } - - // check fromState - workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) - - // check toState - workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) - - // return output - return workflowArgs -} - -def _processFromState(fromState, key_, config_) { - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState == null) { - return null - } - - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key_': the state is not a Map" - def data = fromStateMap.collectMany{newkey, origkey -> - // check whether newkey corresponds to a required argument - if (state.containsKey(origkey)) { - [[newkey, state[origkey]]] - } else if (!requiredInputNames.contains(origkey)) { - [] - } else { - throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") - } - }.collectEntries() - data - } - } - - return fromState -} - -def _processToState(toState, key_, config_) { - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key_': the output is not a Map" - assert state instanceof Map : "Error in module '$key_': the state is not a Map" - def extraEntries = toStateMap.collectMany{newkey, origkey -> - // check whether newkey corresponds to a required argument - if (output.containsKey(origkey)) { - [[newkey, output[origkey]]] - } else if (!requiredOutputNames.contains(origkey)) { - [] - } else { - throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") - } - }.collectEntries() - state + extraEntries - } - } - - return toState -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' -def _debug(workflowArgs, debugKey) { - if (workflowArgs.debug) { - view { "process '${workflowArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -// depends on: innerWorkflowFactory -def workflowFactory(Map args, Map defaultWfArgs, Map meta) { - def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) - def key_ = workflowArgs["key"] - - workflow workflowInstance { - take: input_ - - main: - def chModified = input_ - | checkUniqueIds([:]) - | _debug(workflowArgs, "input") - | map { tuple -> - tuple = deepClone(tuple) - - if (workflowArgs.map) { - tuple = workflowArgs.map(tuple) - } - if (workflowArgs.mapId) { - tuple[0] = workflowArgs.mapId(tuple[0]) - } - if (workflowArgs.mapData) { - tuple[1] = workflowArgs.mapData(tuple[1]) - } - if (workflowArgs.mapPassthrough) { - tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - if (tuple[0] instanceof GString) { - tuple[0] = tuple[0].toString() - } - assert tuple[0] instanceof CharSequence : - "Error in module '${key_}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key_}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (workflowArgs.renameKeys) { - assert workflowArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - workflowArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - - def chRun = null - def chPassthrough = null - if (workflowArgs.runIf) { - def runIfBranch = chModified.branch{ tup -> - run: workflowArgs.runIf(tup[0], tup[1]) - passthrough: true - } - chRun = runIfBranch.run - chPassthrough = runIfBranch.passthrough - } else { - chRun = chModified - chPassthrough = Channel.empty() - } - - def chRunFiltered = workflowArgs.filter ? - chRun | filter{workflowArgs.filter(it)} : - chRun - - def chArgs = workflowArgs.fromState ? - chRunFiltered | map{ - def new_data = workflowArgs.fromState(it.take(2)) - [it[0], new_data] - } : - chRunFiltered | map {tup -> tup.take(2)} - - // fill in defaults - def chArgsWithDefaults = chArgs - | map { tuple -> - def id_ = tuple[0] - def data_ = tuple[1] - - // TODO: could move fromState to here - - // fetch default params from functionality - def defaultArgs = meta.config.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = meta.config.allArguments - .findAll { par -> - def argKey = key_ + "__" + par.plainName - params.containsKey(argKey) - } - .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = meta.config.allArguments - .findAll { data_.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data_[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs - .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} - - combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) - - [id_, combinedArgs] + tuple.drop(2) - } - - // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults - | _debug(workflowArgs, "processed") - // run workflow - | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> - - // see if output map contains metadata - def meta_ = - output_ instanceof Map && output_.containsKey("_meta") ? - output_["_meta"] : - [:] - def join_id = meta_.join_id ?: id_ - - // remove metadata - output_ = output_.findAll{k, v -> k != "_meta"} - - // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) - - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] - } - // | view{"chInitialOutput: ${it.take(3)}"} - - // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) - // input tuple format: [join_id, id, output, prev_state, ...] - // output tuple format: [join_id, id, new_state, ...] - | map{ tup -> - def new_state = workflowArgs.toState(tup.drop(1).take(3)) - tup.take(2) + [new_state] + tup.drop(4) - } - - if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState - // input tuple format: [join_id, id, new_state, ...] - // output tuple format: [join_id, id, new_state] - | map{ tup -> - tup.take(3) - } - - safeJoin(chPublish, chArgsWithDefaults, key_) - // input tuple format: [join_id, id, new_state, orig_state, ...] - // output tuple format: [id, new_state, orig_state] - | map { tup -> - tup.drop(1).take(3) - } - | publishStatesByConfig(key: key_, config: meta.config) - } - - // remove join_id and meta - chReturn = chNewState - | map { tup -> - // input tuple format: [join_id, id, new_state, ...] - // output tuple format: [id, new_state, ...] - tup.drop(1) - } - | _debug(workflowArgs, "output") - | concat(chPassthrough) - - emit: chReturn - } - - def wf = workflowInstance.cloneWithName(key_) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs, workflowArgs, meta) - } - // add config to module for later introspection - wf.metaClass.config = meta.config - - return wf -} - -nextflow.enable.dsl=2 - -// START COMPONENT-SPECIFIC CODE - -// create meta object -meta = [ - "resources_dir": moduleDir.toRealPath().normalize(), - "config": processConfig(readJsonBlob('''{ - "name" : "fastqc", - "version" : "main", - "argument_groups" : [ - { - "name" : "Input", - "arguments" : [ - { - "type" : "boolean", - "name" : "--paired", - "description" : "Paired fastq files or not?", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--input", - "description" : "Input fastq files, either one or two (paired)", - "example" : [ - "sample.fastq" - ], - "must_exist" : true, - "create_parent" : true, - "required" : true, - "direction" : "input", - "multiple" : true, - "multiple_sep" : "," - } - ] - }, - { - "name" : "Output", - "arguments" : [ - { - "type" : "file", - "name" : "--fastqc_html_1", - "description" : "FastQC HTML report for read 1.", - "default" : [ - "$id.read_1.fastqc.html" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--fastqc_html_2", - "description" : "FastQC HTML report for read 2.", - "default" : [ - "$id.read_2.fastqc.html" - ], - "must_exist" : false, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--fastqc_zip_1", - "description" : "FastQC report archive for read 1.", - "default" : [ - "$id.read_1.fastqc.zip" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--fastqc_zip_2", - "description" : "FastQC report archive for read 2.", - "default" : [ - "$id.read_2.fastqc.zip" - ], - "must_exist" : false, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - } - ] - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true - } - ], - "description" : "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz" - }, - { - "type" : "file", - "path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz" - } - ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/fastqc/main.nf", - "modules/nf-core/fastqc/meta.yml" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], - "runners" : [ - { - "type" : "executable", - "id" : "executable", - "docker_setup_strategy" : "ifneedbepullelsecachedbuild" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1000000000.B", - "mem2gb" : "memory = 2000000000.B", - "mem5gb" : "memory = 5000000000.B", - "mem10gb" : "memory = 10000000000.B", - "mem20gb" : "memory = 20000000000.B", - "mem50gb" : "memory = 50000000000.B", - "mem100gb" : "memory = 100000000000.B", - "mem200gb" : "memory = 200000000000.B", - "mem500gb" : "memory = 500000000000.B", - "mem1tb" : "memory = 1000000000000.B", - "mem2tb" : "memory = 2000000000000.B", - "mem5tb" : "memory = 5000000000000.B", - "mem10tb" : "memory = 10000000000000.B", - "mem20tb" : "memory = 20000000000000.B", - "mem50tb" : "memory = 50000000000000.B", - "mem100tb" : "memory = 100000000000000.B", - "mem200tb" : "memory = 200000000000000.B", - "mem500tb" : "memory = 500000000000000.B", - "mem1gib" : "memory = 1073741824.B", - "mem2gib" : "memory = 2147483648.B", - "mem4gib" : "memory = 4294967296.B", - "mem8gib" : "memory = 8589934592.B", - "mem16gib" : "memory = 17179869184.B", - "mem32gib" : "memory = 34359738368.B", - "mem64gib" : "memory = 68719476736.B", - "mem128gib" : "memory = 137438953472.B", - "mem256gib" : "memory = 274877906944.B", - "mem512gib" : "memory = 549755813888.B", - "mem1tib" : "memory = 1099511627776.B", - "mem2tib" : "memory = 2199023255552.B", - "mem4tib" : "memory = 4398046511104.B", - "mem8tib" : "memory = 8796093022208.B", - "mem16tib" : "memory = 17592186044416.B", - "mem32tib" : "memory = 35184372088832.B", - "mem64tib" : "memory = 70368744177664.B", - "mem128tib" : "memory = 140737488355328.B", - "mem256tib" : "memory = 281474976710656.B", - "mem512tib" : "memory = 562949953421312.B", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "engines" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:22.04", - "target_registry" : "images.viash-hub.com", - "target_tag" : "main", - "namespace_separator" : "/", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "fastqc" - ], - "interactive" : false - } - ] - }, - { - "type" : "native", - "id" : "native" - } - ], - "build_info" : { - "config" : "/workdir/root/repo/src/fastqc/config.vsh.yaml", - "runner" : "nextflow", - "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/fastqc", - "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" - }, - "package_config" : { - "name" : "rnaseq", - "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], - "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", - "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", - ".engines += { type: \\"native\\" }", - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", - ".engines[.type == 'docker'].target_tag := 'main'" - ], - "organization" : "vsh" - } -}''')) -] - -// resolve dependencies dependencies (if any) - - -// inner workflow -// inner workflow hook -def innerWorkflowFactory(args) { - def rawScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_HTML_1+x} ]; then echo "${VIASH_PAR_FASTQC_HTML_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_html_1='&'#" ; else echo "# par_fastqc_html_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_HTML_2+x} ]; then echo "${VIASH_PAR_FASTQC_HTML_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_html_2='&'#" ; else echo "# par_fastqc_html_2="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_ZIP_1+x} ]; then echo "${VIASH_PAR_FASTQC_ZIP_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_zip_1='&'#" ; else echo "# par_fastqc_zip_1="; fi ) -$( if [ ! -z ${VIASH_PAR_FASTQC_ZIP_2+x} ]; then echo "${VIASH_PAR_FASTQC_ZIP_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastqc_zip_2='&'#" ; else echo "# par_fastqc_zip_2="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -function clean_up { - rm -rf "\\$tmpdir" -} -trap clean_up EXIT - -tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX") - -IFS="," read -ra input <<< \\$par_input -count=\\${#input[@]} - -if \\$par_paired; then - echo "Paired - \\$count" - if [ \\$count -ne 2 ]; then - echo "Paired end input requires two files" - exit 1 - fi -else - echo "Not Paired - \\$count" - if [ \\$count -ne 1 ]; then - echo "Single end input requires one file" - exit 1 - fi -fi - -fastqc -o \\$tmpdir \\${input[*]} - -file1=\\$(basename -- "\\${input[0]}") -read1="\\${file1%.fastq*}" -[[ -e "\\${tmpdir}/\\${read1}_fastqc.html" ]] && cp "\\${tmpdir}/\\${read1}_fastqc.html" \\$par_fastqc_html_1 -[[ -e "\\${tmpdir}/\\${read1}_fastqc.zip" ]] && cp "\\${tmpdir}/\\${read1}_fastqc.zip" \\$par_fastqc_zip_1 - -if \\$par_paired; then - file2=\\$(basename -- "\\${input[1]}") - read2="\\${file2%.fastq*}" - [[ -e "\\${tmpdir}/\\${read2}_fastqc.html" ]] && cp "\\${tmpdir}/\\${read2}_fastqc.html" \\$par_fastqc_html_2 - [[ -e "\\${tmpdir}/\\${read2}_fastqc.zip" ]] && cp "\\${tmpdir}/\\${read2}_fastqc.zip" \\$par_fastqc_zip_2 -fi -VIASHMAIN -bash "$tempscript" -''' - - return vdsl3WorkflowFactory(args, meta, rawScript) -} - - - -/** - * Generate a workflow for VDSL3 modules. - * - * This function is called by the workflowFactory() function. - * - * Input channel: [id, input_map] - * Output channel: [id, output_map] - * - * Internally, this workflow will convert the input channel - * to a format which the Nextflow module will be able to handle. - */ -def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { - def key = args["key"] - def processObj = null - - workflow processWf { - take: input_ - main: - - if (processObj == null) { - processObj = _vdsl3ProcessFactory(args, meta, rawScript) - } - - output_ = input_ - | map { tuple -> - def id = tuple[0] - def data_ = tuple[1] - - if (workflow.stubRun) { - // add id if missing - data_ = [id: 'stub'] + data_ - } - - // process input files separately - def inputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = meta.config.allArguments - .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = data_[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val - .replaceAll('\\$id', id) - .replaceAll('\\$\\{id\\}', id) - .replaceAll('\\$key', key) - .replaceAll('\\$\\{key\\}', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] - } - | processObj - | map { output -> - def outputFiles = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - def out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - [ output[0], outputFiles ] - } - emit: output_ - } - - return processWf -} - -// depends on: session? -def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { - // autodetect process key - def wfKey = workflowArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def scriptMeta = nextflow.script.ScriptMeta.current() - def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = workflowArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } - .join() - - def outputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (workflowArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = meta.config.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_NAME="${meta.config.name}" - |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) - | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) - | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) - | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) - | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (workflowArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // write process to temp file - def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") - addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } - tempFile.text = procStr - - // create process from temp file - def binding = new nextflow.script.ScriptBinding([:]) - def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) - .setModule(true) - .setBinding(binding) - def moduleScript = parser.runScript(tempFile) - .getScript() - - // register module in meta - def module = new nextflow.script.IncludeDef.Module(name: procKey) - scriptMeta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return scriptMeta.getProcess(procKey) -} - -// defaults -meta["defaults"] = [ - // key to be used to trace the process and determine output names - key: null, - - // fixed arguments to be passed to script - args: [:], - - // default directives - directives: readJsonBlob('''{ - "container" : { - "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/fastqc", - "tag" : "main" - }, - "tag" : "$id" -}'''), - - // auto settings - auto: readJsonBlob('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Choose whether or not to run the component on the tuple if the condition is true. - // Otherwise, the tuple will be passed through. - // Example: `{ tup -> tup[0] != "skip_this" }` - runIf: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// initialise default workflow -meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) - -// add workflow to environment -nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) - -// anonymous workflow for running this module as a standalone -workflow { - // add id argument if it's not already in the config - // TODO: deep copy - def newConfig = deepClone(meta.config) - def newParams = deepClone(params) - - def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} - if (!argsContainsId) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - newConfig.arguments.add(0, idArg) - newConfig = processConfig(newConfig) - } - if (!newParams.containsKey("id")) { - newParams.id = "run" - } - - helpMessage(newConfig) - - channelFromParams(newParams, newConfig) - // make sure id is not in the state if id is not in the args - | map {id, state -> - if (!argsContainsId) { - [id, state.findAll{k, v -> k != "id"}] - } else { - [id, state] - } - } - | meta.workflow.run( - auto: [ publish: "state" ] - ) -} - -// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/fastqc/nextflow_schema.json b/target/nextflow/fastqc/nextflow_schema.json deleted file mode 100644 index 868796b..0000000 --- a/target/nextflow/fastqc/nextflow_schema.json +++ /dev/null @@ -1,139 +0,0 @@ -{ -"$schema": "http://json-schema.org/draft-07/schema", -"title": "fastqc", -"description": "Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n", -"type": "object", -"definitions": { - - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "paired": { - "type": - "boolean", - "description": "Type: `boolean`, default: `false`. Paired fastq files or not?", - "help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?" - , - "default":false - } - - - , - "input": { - "type": - "string", - "description": "Type: List of `file`, required, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)", - "help_text": "Type: List of `file`, required, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)" - - } - - -} -}, - - - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - - "fastqc_html_1": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.fastqc_html_1.html`. FastQC HTML report for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastqc_html_1.html`. FastQC HTML report for read 1." - , - "default":"$id.$key.fastqc_html_1.html" - } - - - , - "fastqc_html_2": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.fastqc_html_2.html`. FastQC HTML report for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastqc_html_2.html`. FastQC HTML report for read 2." - , - "default":"$id.$key.fastqc_html_2.html" - } - - - , - "fastqc_zip_1": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.fastqc_zip_1.zip`. FastQC report archive for read 1", - "help_text": "Type: `file`, default: `$id.$key.fastqc_zip_1.zip`. FastQC report archive for read 1." - , - "default":"$id.$key.fastqc_zip_1.zip" - } - - - , - "fastqc_zip_2": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.fastqc_zip_2.zip`. FastQC report archive for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastqc_zip_2.zip`. FastQC report archive for read 2." - , - "default":"$id.$key.fastqc_zip_2.zip" - } - - -} -}, - - - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - - "publish_dir": { - "type": - "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - - } - - - , - "param_list": { - "type": - "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - - } - - -} -} -}, -"allOf": [ - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/output" - }, - - { - "$ref": "#/definitions/nextflow input-output arguments" - } -] -} diff --git a/target/nextflow/fq_subsample/.config.vsh.yaml b/target/nextflow/fq_subsample/.config.vsh.yaml deleted file mode 100644 index 6e33193..0000000 --- a/target/nextflow/fq_subsample/.config.vsh.yaml +++ /dev/null @@ -1,207 +0,0 @@ -name: "fq_subsample" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--input" - description: "Input fastq files to subsample" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - - type: "string" - name: "--extra_args" - description: "Extra arguments to pass to fq subsample" - info: null - default: - - "" - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Input" - arguments: - - type: "file" - name: "--output_1" - description: "Sampled read 1 fastq files" - info: null - default: - - "$id.read_1.subsampled.fastq" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output_2" - description: "Sampled read 2 fastq files" - info: null - default: - - "$id.read_2.subsampled.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "fq subsample outputs a subset of records from single or paired FASTQ\ - \ files. This requires a seed (--seed) to be set in ext.args\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/fq/subsample/main.nf" - - "modules/nf-core/fq/subsample/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "docker" - run: - - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\ - \ && \\\napt-get update && \\\napt-get install -y --no-install-recommends build-essential\ - \ git-all curl && \\\ncurl https://sh.rustup.rs -sSf | sh -s -- -y && \\\n.\ - \ \"$HOME/.cargo/env\" && \\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git\ - \ && \\\nmv fq /usr/local/ && cd /usr/local/fq && \\\ncargo install --locked\ - \ --path . && \\\nmv /usr/local/fq/target/release/fq /usr/local/bin/\n" - env: - - "TZ=Europe/Brussels" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/fq_subsample/config.vsh.yaml" - runner: "nextflow" - engine: "docker|native" - output: "target/nextflow/fq_subsample" - executable: "target/nextflow/fq_subsample/main.nf" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/nextflow/fq_subsample/nextflow_schema.json b/target/nextflow/fq_subsample/nextflow_schema.json deleted file mode 100644 index 0a22405..0000000 --- a/target/nextflow/fq_subsample/nextflow_schema.json +++ /dev/null @@ -1,117 +0,0 @@ -{ -"$schema": "http://json-schema.org/draft-07/schema", -"title": "fq_subsample", -"description": "fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n", -"type": "object", -"definitions": { - - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "input": { - "type": - "string", - "description": "Type: List of `file`, multiple_sep: `\";\"`. Input fastq files to subsample", - "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input fastq files to subsample" - - } - - - , - "extra_args": { - "type": - "string", - "description": "Type: `string`, default: ``. Extra arguments to pass to fq subsample", - "help_text": "Type: `string`, default: ``. Extra arguments to pass to fq subsample" - , - "default":"" - } - - -} -}, - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "output_1": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.output_1.fastq`. Sampled read 1 fastq files", - "help_text": "Type: `file`, default: `$id.$key.output_1.fastq`. Sampled read 1 fastq files" - , - "default":"$id.$key.output_1.fastq" - } - - - , - "output_2": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.output_2.fastq`. Sampled read 2 fastq files", - "help_text": "Type: `file`, default: `$id.$key.output_2.fastq`. Sampled read 2 fastq files" - , - "default":"$id.$key.output_2.fastq" - } - - -} -}, - - - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - - "publish_dir": { - "type": - "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - - } - - - , - "param_list": { - "type": - "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - - } - - -} -} -}, -"allOf": [ - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/nextflow input-output arguments" - } -] -} diff --git a/target/nextflow/getchromsizes/.config.vsh.yaml b/target/nextflow/getchromsizes/.config.vsh.yaml index 5fd4996..516c4f9 100644 --- a/target/nextflow/getchromsizes/.config.vsh.yaml +++ b/target/nextflow/getchromsizes/.config.vsh.yaml @@ -70,7 +70,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -167,8 +167,8 @@ build_info: output: "target/nextflow/getchromsizes" executable: "target/nextflow/getchromsizes/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -179,7 +179,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/getchromsizes/main.nf b/target/nextflow/getchromsizes/main.nf index 558cede..eaee980 100644 --- a/target/nextflow/getchromsizes/main.nf +++ b/target/nextflow/getchromsizes/main.nf @@ -2901,7 +2901,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3013,8 +3013,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/getchromsizes", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3031,7 +3031,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/gtf2bed/.config.vsh.yaml b/target/nextflow/gtf2bed/.config.vsh.yaml index ec0e69d..e37b0e6 100644 --- a/target/nextflow/gtf2bed/.config.vsh.yaml +++ b/target/nextflow/gtf2bed/.config.vsh.yaml @@ -51,7 +51,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -145,8 +145,8 @@ build_info: output: "target/nextflow/gtf2bed" executable: "target/nextflow/gtf2bed/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -157,7 +157,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/gtf2bed/main.nf b/target/nextflow/gtf2bed/main.nf index 37a312d..4f90219 100644 --- a/target/nextflow/gtf2bed/main.nf +++ b/target/nextflow/gtf2bed/main.nf @@ -2882,7 +2882,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -2995,8 +2995,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/gtf2bed", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3013,7 +3013,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/gtf_filter/.config.vsh.yaml b/target/nextflow/gtf_filter/.config.vsh.yaml index deb2a32..71979d1 100644 --- a/target/nextflow/gtf_filter/.config.vsh.yaml +++ b/target/nextflow/gtf_filter/.config.vsh.yaml @@ -66,7 +66,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -155,8 +155,8 @@ build_info: output: "target/nextflow/gtf_filter" executable: "target/nextflow/gtf_filter/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -167,7 +167,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/gtf_filter/main.nf b/target/nextflow/gtf_filter/main.nf index d72bb20..35532f0 100644 --- a/target/nextflow/gtf_filter/main.nf +++ b/target/nextflow/gtf_filter/main.nf @@ -2899,7 +2899,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3003,8 +3003,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/gtf_filter", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3021,7 +3021,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/gunzip/.config.vsh.yaml b/target/nextflow/gunzip/.config.vsh.yaml index 2174f74..8f283c6 100644 --- a/target/nextflow/gunzip/.config.vsh.yaml +++ b/target/nextflow/gunzip/.config.vsh.yaml @@ -50,7 +50,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -144,8 +144,8 @@ build_info: output: "target/nextflow/gunzip" executable: "target/nextflow/gunzip/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -156,7 +156,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/gunzip/main.nf b/target/nextflow/gunzip/main.nf index b8f2e1d..95c0a81 100644 --- a/target/nextflow/gunzip/main.nf +++ b/target/nextflow/gunzip/main.nf @@ -2879,7 +2879,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -2992,8 +2992,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/gunzip", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3010,7 +3010,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/kallisto/kallisto_index/nextflow_schema.json b/target/nextflow/kallisto/kallisto_index/nextflow_schema.json deleted file mode 100644 index 837bc7c..0000000 --- a/target/nextflow/kallisto/kallisto_index/nextflow_schema.json +++ /dev/null @@ -1,105 +0,0 @@ -{ -"$schema": "http://json-schema.org/draft-07/schema", -"title": "kallisto_index", -"description": "Create Kallisto index.\n", -"type": "object", -"definitions": { - - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "transcriptome_fasta": { - "type": - "string", - "description": "Type: `file`. ", - "help_text": "Type: `file`. " - - } - - - , - "pseudo_aligner_kmer_size": { - "type": - "integer", - "description": "Type: `integer`. Kmer length passed to indexing step of pseudoaligners", - "help_text": "Type: `integer`. Kmer length passed to indexing step of pseudoaligners." - - } - - -} -}, - - - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - - "kallisto_index": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.kallisto_index.kallisto_index`. ", - "help_text": "Type: `file`, default: `$id.$key.kallisto_index.kallisto_index`. " - , - "default":"$id.$key.kallisto_index.kallisto_index" - } - - -} -}, - - - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - - "publish_dir": { - "type": - "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - - } - - - , - "param_list": { - "type": - "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - - } - - -} -} -}, -"allOf": [ - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/output" - }, - - { - "$ref": "#/definitions/nextflow input-output arguments" - } -] -} diff --git a/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml b/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml index afa6beb..0591afb 100644 --- a/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml +++ b/target/nextflow/multiqc_custom_biotype/.config.vsh.yaml @@ -76,7 +76,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -165,8 +165,8 @@ build_info: output: "target/nextflow/multiqc_custom_biotype" executable: "target/nextflow/multiqc_custom_biotype/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -177,7 +177,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/multiqc_custom_biotype/main.nf b/target/nextflow/multiqc_custom_biotype/main.nf index 26d36e3..bdbd248 100644 --- a/target/nextflow/multiqc_custom_biotype/main.nf +++ b/target/nextflow/multiqc_custom_biotype/main.nf @@ -2908,7 +2908,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3012,8 +3012,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/multiqc_custom_biotype", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3030,7 +3030,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/picard_markduplicates/.config.vsh.yaml b/target/nextflow/picard_markduplicates/.config.vsh.yaml index d0dc8b4..e3d3e65 100644 --- a/target/nextflow/picard_markduplicates/.config.vsh.yaml +++ b/target/nextflow/picard_markduplicates/.config.vsh.yaml @@ -110,7 +110,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -207,8 +207,8 @@ build_info: output: "target/nextflow/picard_markduplicates" executable: "target/nextflow/picard_markduplicates/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -219,7 +219,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/picard_markduplicates/main.nf b/target/nextflow/picard_markduplicates/main.nf index 546cbe0..0cda539 100644 --- a/target/nextflow/picard_markduplicates/main.nf +++ b/target/nextflow/picard_markduplicates/main.nf @@ -2948,7 +2948,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3063,8 +3063,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/picard_markduplicates", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3081,7 +3081,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/prepare_multiqc_input/.config.vsh.yaml b/target/nextflow/prepare_multiqc_input/.config.vsh.yaml index f564fd1..a990214 100644 --- a/target/nextflow/prepare_multiqc_input/.config.vsh.yaml +++ b/target/nextflow/prepare_multiqc_input/.config.vsh.yaml @@ -320,7 +320,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -409,8 +409,8 @@ build_info: output: "target/nextflow/prepare_multiqc_input" executable: "target/nextflow/prepare_multiqc_input/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -421,7 +421,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/prepare_multiqc_input/main.nf b/target/nextflow/prepare_multiqc_input/main.nf index 35bf19c..aa0feaa 100644 --- a/target/nextflow/prepare_multiqc_input/main.nf +++ b/target/nextflow/prepare_multiqc_input/main.nf @@ -3173,7 +3173,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3277,8 +3277,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/prepare_multiqc_input", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3295,7 +3295,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml b/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml index 0d8f12e..701953f 100644 --- a/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml +++ b/target/nextflow/preprocess_transcripts_fasta/.config.vsh.yaml @@ -49,7 +49,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -138,8 +138,8 @@ build_info: output: "target/nextflow/preprocess_transcripts_fasta" executable: "target/nextflow/preprocess_transcripts_fasta/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -150,7 +150,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/preprocess_transcripts_fasta/main.nf b/target/nextflow/preprocess_transcripts_fasta/main.nf index 849206a..e879dee 100644 --- a/target/nextflow/preprocess_transcripts_fasta/main.nf +++ b/target/nextflow/preprocess_transcripts_fasta/main.nf @@ -2878,7 +2878,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -2982,8 +2982,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/preprocess_transcripts_fasta", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3000,7 +3000,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/preseq_lcextrap/.config.vsh.yaml b/target/nextflow/preseq_lcextrap/.config.vsh.yaml index 8f8ce58..64b1d10 100644 --- a/target/nextflow/preseq_lcextrap/.config.vsh.yaml +++ b/target/nextflow/preseq_lcextrap/.config.vsh.yaml @@ -70,7 +70,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -191,8 +191,8 @@ build_info: output: "target/nextflow/preseq_lcextrap" executable: "target/nextflow/preseq_lcextrap/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -203,7 +203,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/preseq_lcextrap/main.nf b/target/nextflow/preseq_lcextrap/main.nf index e7ff41c..20a8630 100644 --- a/target/nextflow/preseq_lcextrap/main.nf +++ b/target/nextflow/preseq_lcextrap/main.nf @@ -2902,7 +2902,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3035,8 +3035,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/preseq_lcextrap", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3053,7 +3053,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml b/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml deleted file mode 100644 index 0ff07e4..0000000 --- a/target/nextflow/rsem/rsem_calculate_expression/.config.vsh.yaml +++ /dev/null @@ -1,329 +0,0 @@ -name: "rsem_calculate_expression" -namespace: "rsem" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "string" - name: "--id" - description: "Sample ID." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--strandedness" - description: "Sample strand-specificity. Must be one of unstranded, forward, reverse" - info: null - required: false - choices: - - "forward" - - "reverse" - - "unstranded" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--paired" - description: "Paired-end reads or not?" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input reads for quantification." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: true - multiple_sep: ";" - - type: "file" - name: "--index" - description: "RSEM index." - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--extra_args" - description: "Extra rsem-calculate-expression arguments in addition to the defaults." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--counts_gene" - description: "Expression counts on gene level" - info: null - example: - - "sample.genes.results" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--counts_transcripts" - description: "Expression counts on transcript level" - info: null - example: - - "sample.isoforms.results" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--stat" - description: "RSEM statistics" - info: null - example: - - "sample.stat" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--logs" - description: "RSEM logs" - info: null - example: - - "sample.log" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam_star" - description: "BAM file generated by STAR (optional)" - info: null - example: - - "sample.STAR.genome.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam_genome" - description: "Genome BAM file (optional)" - info: null - example: - - "sample.genome.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--bam_transcript" - description: "Transcript BAM file (optional)" - info: null - example: - - "sample.transcript.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "Calculate expression with RSEM.\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "SRR6357070_1.fastq.gz" -- type: "file" - path: "SRR6357070_2.fastq.gz" -- type: "file" - path: "rsem.tar.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/rsem/calculateexpression/main.nf" - - "modules/nf-core/rsem/calculateexpression/meta.yml" - last_sha: "92b2a7857de1dda9d1c19a088941fc81e2976ff7" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "build-essential" - - "gcc" - - "g++" - - "make" - - "wget" - - "zlib1g-dev" - - "unzip" - - "xxd" - - "perl" - - "r-base" - - "bowtie2" - - "python3-pip" - - "git" - interactive: false - - type: "docker" - run: - - "ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone\ - \ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\ - \ && \\\nunzip ${STAR_VERSION}.zip && \\\ncd STAR-${STAR_VERSION}/source &&\ - \ \\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\ncp STAR /usr/local/bin\ - \ && \\\ncd /tmp && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip\ - \ && \\\nunzip v${RSEM_VERSION}.zip && \\\ncd RSEM-${RSEM_VERSION} && \\\nmake\ - \ && \\\nmake install && \\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\ - \ && \\\nrm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \\\n\ - cd && \\\napt-get clean && \\\necho 'export PATH=$PATH:/usr/local/bin' >> /etc/profile\ - \ && \\\necho 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \\\n/bin/bash\ - \ -c \"source /etc/profile && source ~/.bashrc && echo $PATH && which STAR\"\ - \n" - env: - - "STAR_VERSION=2.7.11b" - - "RSEM_VERSION=1.3.3" - - "TZ=Europe/Brussels" - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/rsem/rsem_calculate_expression/config.vsh.yaml" - runner: "nextflow" - engine: "docker|native" - output: "target/nextflow/rsem/rsem_calculate_expression" - executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json b/target/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json deleted file mode 100644 index 81d4502..0000000 --- a/target/nextflow/rsem/rsem_calculate_expression/nextflow_schema.json +++ /dev/null @@ -1,213 +0,0 @@ -{ -"$schema": "http://json-schema.org/draft-07/schema", -"title": "rsem_calculate_expression", -"description": "Calculate expression with RSEM.\n", -"type": "object", -"definitions": { - - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "id": { - "type": - "string", - "description": "Type: `string`. Sample ID", - "help_text": "Type: `string`. Sample ID." - - } - - - , - "strandedness": { - "type": - "string", - "description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity", - "help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, reverse", - "enum": ["forward", "reverse", "unstranded"] - - - } - - - , - "paired": { - "type": - "boolean", - "description": "Type: `boolean`. Paired-end reads or not?", - "help_text": "Type: `boolean`. Paired-end reads or not?" - - } - - - , - "input": { - "type": - "string", - "description": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification", - "help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification." - - } - - - , - "index": { - "type": - "string", - "description": "Type: `file`. RSEM index", - "help_text": "Type: `file`. RSEM index." - - } - - - , - "extra_args": { - "type": - "string", - "description": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the defaults", - "help_text": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the defaults." - - } - - -} -}, - - - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - - "counts_gene": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `sample.genes.results`. Expression counts on gene level", - "help_text": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `sample.genes.results`. Expression counts on gene level" - , - "default":"$id.$key.counts_gene.results" - } - - - , - "counts_transcripts": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `sample.isoforms.results`. Expression counts on transcript level", - "help_text": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `sample.isoforms.results`. Expression counts on transcript level" - , - "default":"$id.$key.counts_transcripts.results" - } - - - , - "stat": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.stat.stat`, example: `sample.stat`. RSEM statistics", - "help_text": "Type: `file`, default: `$id.$key.stat.stat`, example: `sample.stat`. RSEM statistics" - , - "default":"$id.$key.stat.stat" - } - - - , - "logs": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.logs.log`, example: `sample.log`. RSEM logs", - "help_text": "Type: `file`, default: `$id.$key.logs.log`, example: `sample.log`. RSEM logs" - , - "default":"$id.$key.logs.log" - } - - - , - "bam_star": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `sample.STAR.genome.bam`. BAM file generated by STAR (optional)", - "help_text": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `sample.STAR.genome.bam`. BAM file generated by STAR (optional)" - , - "default":"$id.$key.bam_star.bam" - } - - - , - "bam_genome": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `sample.genome.bam`. Genome BAM file (optional)", - "help_text": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `sample.genome.bam`. Genome BAM file (optional)" - , - "default":"$id.$key.bam_genome.bam" - } - - - , - "bam_transcript": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `sample.transcript.bam`. Transcript BAM file (optional)", - "help_text": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `sample.transcript.bam`. Transcript BAM file (optional)" - , - "default":"$id.$key.bam_transcript.bam" - } - - -} -}, - - - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - - "publish_dir": { - "type": - "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - - } - - - , - "param_list": { - "type": - "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - - } - - -} -} -}, -"allOf": [ - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/output" - }, - - { - "$ref": "#/definitions/nextflow input-output arguments" - } -] -} diff --git a/target/nextflow/rsem/rsem_merge_counts/.config.vsh.yaml b/target/nextflow/rsem_merge_counts/.config.vsh.yaml similarity index 93% rename from target/nextflow/rsem/rsem_merge_counts/.config.vsh.yaml rename to target/nextflow/rsem_merge_counts/.config.vsh.yaml index d096f9e..4a8231f 100644 --- a/target/nextflow/rsem/rsem_merge_counts/.config.vsh.yaml +++ b/target/nextflow/rsem_merge_counts/.config.vsh.yaml @@ -1,5 +1,4 @@ name: "rsem_merge_counts" -namespace: "rsem" version: "main" argument_groups: - name: "Input" @@ -93,7 +92,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -176,14 +175,14 @@ engines: - type: "native" id: "native" build_info: - config: "src/rsem/rsem_merge_counts/config.vsh.yaml" + config: "src/rsem_merge_counts/config.vsh.yaml" runner: "nextflow" engine: "docker|native" - output: "target/nextflow/rsem/rsem_merge_counts" - executable: "target/nextflow/rsem/rsem_merge_counts/main.nf" + output: "target/nextflow/rsem_merge_counts" + executable: "target/nextflow/rsem_merge_counts/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -194,7 +193,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/rsem/rsem_merge_counts/main.nf b/target/nextflow/rsem_merge_counts/main.nf similarity index 99% rename from target/nextflow/rsem/rsem_merge_counts/main.nf rename to target/nextflow/rsem_merge_counts/main.nf index ff961e1..ae2fc39 100644 --- a/target/nextflow/rsem/rsem_merge_counts/main.nf +++ b/target/nextflow/rsem_merge_counts/main.nf @@ -2805,7 +2805,6 @@ meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ "name" : "rsem_merge_counts", - "namespace" : "rsem", "version" : "main", "argument_groups" : [ { @@ -2924,7 +2923,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3023,13 +3022,13 @@ meta = [ } ], "build_info" : { - "config" : "/workdir/root/repo/src/rsem/rsem_merge_counts/config.vsh.yaml", + "config" : "/workdir/root/repo/src/rsem_merge_counts/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/rsem/rsem_merge_counts", + "output" : "/workdir/root/repo/target/nextflow/rsem_merge_counts", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3046,7 +3045,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3495,7 +3494,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/rsem/rsem_merge_counts", + "image" : "vsh/rnaseq/rsem_merge_counts", "tag" : "main" }, "tag" : "$id" diff --git a/target/nextflow/rsem/rsem_merge_counts/nextflow.config b/target/nextflow/rsem_merge_counts/nextflow.config similarity index 99% rename from target/nextflow/rsem/rsem_merge_counts/nextflow.config rename to target/nextflow/rsem_merge_counts/nextflow.config index 244e687..7ce84a9 100644 --- a/target/nextflow/rsem/rsem_merge_counts/nextflow.config +++ b/target/nextflow/rsem_merge_counts/nextflow.config @@ -1,5 +1,5 @@ manifest { - name = 'rsem/rsem_merge_counts' + name = 'rsem_merge_counts' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'main' diff --git a/target/nextflow/rsem/rsem_merge_counts/nextflow_schema.json b/target/nextflow/rsem_merge_counts/nextflow_schema.json similarity index 100% rename from target/nextflow/rsem/rsem_merge_counts/nextflow_schema.json rename to target/nextflow/rsem_merge_counts/nextflow_schema.json diff --git a/target/nextflow/rseqc/rseqc_innerdistance/nextflow.config b/target/nextflow/rseqc/rseqc_innerdistance/nextflow.config deleted file mode 100644 index 02d4e15..0000000 --- a/target/nextflow/rseqc/rseqc_innerdistance/nextflow.config +++ /dev/null @@ -1,125 +0,0 @@ -manifest { - name = 'rseqc/rseqc_innerdistance' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = 'main' - description = 'Calculate inner distance between read pairs. \n' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1000000000.B } - withLabel: mem2gb { memory = 2000000000.B } - withLabel: mem5gb { memory = 5000000000.B } - withLabel: mem10gb { memory = 10000000000.B } - withLabel: mem20gb { memory = 20000000000.B } - withLabel: mem50gb { memory = 50000000000.B } - withLabel: mem100gb { memory = 100000000000.B } - withLabel: mem200gb { memory = 200000000000.B } - withLabel: mem500gb { memory = 500000000000.B } - withLabel: mem1tb { memory = 1000000000000.B } - withLabel: mem2tb { memory = 2000000000000.B } - withLabel: mem5tb { memory = 5000000000000.B } - withLabel: mem10tb { memory = 10000000000000.B } - withLabel: mem20tb { memory = 20000000000000.B } - withLabel: mem50tb { memory = 50000000000000.B } - withLabel: mem100tb { memory = 100000000000000.B } - withLabel: mem200tb { memory = 200000000000000.B } - withLabel: mem500tb { memory = 500000000000000.B } - withLabel: mem1gib { memory = 1073741824.B } - withLabel: mem2gib { memory = 2147483648.B } - withLabel: mem4gib { memory = 4294967296.B } - withLabel: mem8gib { memory = 8589934592.B } - withLabel: mem16gib { memory = 17179869184.B } - withLabel: mem32gib { memory = 34359738368.B } - withLabel: mem64gib { memory = 68719476736.B } - withLabel: mem128gib { memory = 137438953472.B } - withLabel: mem256gib { memory = 274877906944.B } - withLabel: mem512gib { memory = 549755813888.B } - withLabel: mem1tib { memory = 1099511627776.B } - withLabel: mem2tib { memory = 2199023255552.B } - withLabel: mem4tib { memory = 4398046511104.B } - withLabel: mem8tib { memory = 8796093022208.B } - withLabel: mem16tib { memory = 17592186044416.B } - withLabel: mem32tib { memory = 35184372088832.B } - withLabel: mem64tib { memory = 70368744177664.B } - withLabel: mem128tib { memory = 140737488355328.B } - withLabel: mem256tib { memory = 281474976710656.B } - withLabel: mem512tib { memory = 562949953421312.B } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml index f8b3fca..100f49c 100644 --- a/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_junctionannotation/.config.vsh.yaml @@ -160,7 +160,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -260,8 +260,8 @@ build_info: output: "target/nextflow/rseqc/rseqc_junctionannotation" executable: "target/nextflow/rseqc/rseqc_junctionannotation/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -272,7 +272,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/rseqc/rseqc_junctionannotation/main.nf b/target/nextflow/rseqc/rseqc_junctionannotation/main.nf index 72964d9..e568727 100644 --- a/target/nextflow/rseqc/rseqc_junctionannotation/main.nf +++ b/target/nextflow/rseqc/rseqc_junctionannotation/main.nf @@ -3006,7 +3006,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3128,8 +3128,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_junctionannotation", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3146,7 +3146,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml index 90b63d8..315a79d 100644 --- a/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/.config.vsh.yaml @@ -149,7 +149,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -249,8 +249,8 @@ build_info: output: "target/nextflow/rseqc/rseqc_junctionsaturation" executable: "target/nextflow/rseqc/rseqc_junctionsaturation/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -261,7 +261,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf b/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf index d47feed..2187a7a 100644 --- a/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf +++ b/target/nextflow/rseqc/rseqc_junctionsaturation/main.nf @@ -2991,7 +2991,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3113,8 +3113,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_junctionsaturation", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3131,7 +3131,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml index b1217be..b771239 100644 --- a/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_readdistribution/.config.vsh.yaml @@ -63,7 +63,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -162,8 +162,8 @@ build_info: output: "target/nextflow/rseqc/rseqc_readdistribution" executable: "target/nextflow/rseqc/rseqc_readdistribution/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -174,7 +174,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/rseqc/rseqc_readdistribution/main.nf b/target/nextflow/rseqc/rseqc_readdistribution/main.nf index c58ba42..95e3513 100644 --- a/target/nextflow/rseqc/rseqc_readdistribution/main.nf +++ b/target/nextflow/rseqc/rseqc_readdistribution/main.nf @@ -2896,7 +2896,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3017,8 +3017,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_readdistribution", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3035,7 +3035,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml index da7c41b..12aabcf 100644 --- a/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_readduplication/.config.vsh.yaml @@ -111,7 +111,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -211,8 +211,8 @@ build_info: output: "target/nextflow/rseqc/rseqc_readduplication" executable: "target/nextflow/rseqc/rseqc_readduplication/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -223,7 +223,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/rseqc/rseqc_readduplication/main.nf b/target/nextflow/rseqc/rseqc_readduplication/main.nf index 96c107e..4651d50 100644 --- a/target/nextflow/rseqc/rseqc_readduplication/main.nf +++ b/target/nextflow/rseqc/rseqc_readduplication/main.nf @@ -2949,7 +2949,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3071,8 +3071,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_readduplication", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3089,7 +3089,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml b/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml index 860abab..4e2dbcd 100644 --- a/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml +++ b/target/nextflow/rseqc/rseqc_tin/.config.vsh.yaml @@ -117,7 +117,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -214,8 +214,8 @@ build_info: output: "target/nextflow/rseqc/rseqc_tin" executable: "target/nextflow/rseqc/rseqc_tin/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -226,7 +226,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/rseqc/rseqc_tin/main.nf b/target/nextflow/rseqc/rseqc_tin/main.nf index 5a81ee7..ff2bc66 100644 --- a/target/nextflow/rseqc/rseqc_tin/main.nf +++ b/target/nextflow/rseqc/rseqc_tin/main.nf @@ -2957,7 +2957,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3076,8 +3076,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/rseqc/rseqc_tin", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3094,7 +3094,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/sortmerna/.config.vsh.yaml b/target/nextflow/sortmerna/.config.vsh.yaml index 0707e6b..8ece88b 100644 --- a/target/nextflow/sortmerna/.config.vsh.yaml +++ b/target/nextflow/sortmerna/.config.vsh.yaml @@ -103,7 +103,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -192,8 +192,8 @@ build_info: output: "target/nextflow/sortmerna" executable: "target/nextflow/sortmerna/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -204,7 +204,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/sortmerna/main.nf b/target/nextflow/sortmerna/main.nf index 78712bf..5e2d5c9 100644 --- a/target/nextflow/sortmerna/main.nf +++ b/target/nextflow/sortmerna/main.nf @@ -2938,7 +2938,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3042,8 +3042,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/sortmerna", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3060,7 +3060,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/stringtie/.config.vsh.yaml b/target/nextflow/stringtie/.config.vsh.yaml index 68bc0e4..0fd7807 100644 --- a/target/nextflow/stringtie/.config.vsh.yaml +++ b/target/nextflow/stringtie/.config.vsh.yaml @@ -120,7 +120,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -216,8 +216,8 @@ build_info: output: "target/nextflow/stringtie" executable: "target/nextflow/stringtie/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -228,7 +228,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/stringtie/main.nf b/target/nextflow/stringtie/main.nf index 23122ea..90d35f8 100644 --- a/target/nextflow/stringtie/main.nf +++ b/target/nextflow/stringtie/main.nf @@ -2961,7 +2961,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3073,8 +3073,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/stringtie", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3091,7 +3091,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/summarizedexperiment/.config.vsh.yaml b/target/nextflow/summarizedexperiment/.config.vsh.yaml index d28c04b..9be19e8 100644 --- a/target/nextflow/summarizedexperiment/.config.vsh.yaml +++ b/target/nextflow/summarizedexperiment/.config.vsh.yaml @@ -99,7 +99,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -199,8 +199,8 @@ build_info: output: "target/nextflow/summarizedexperiment" executable: "target/nextflow/summarizedexperiment/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -211,7 +211,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/summarizedexperiment/main.nf b/target/nextflow/summarizedexperiment/main.nf index d92bfb7..d563ea2 100644 --- a/target/nextflow/summarizedexperiment/main.nf +++ b/target/nextflow/summarizedexperiment/main.nf @@ -2932,7 +2932,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3054,8 +3054,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/summarizedexperiment", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3072,7 +3072,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/tx2gene/.config.vsh.yaml b/target/nextflow/tx2gene/.config.vsh.yaml index fd94e72..4c1c4ad 100644 --- a/target/nextflow/tx2gene/.config.vsh.yaml +++ b/target/nextflow/tx2gene/.config.vsh.yaml @@ -87,7 +87,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -185,8 +185,8 @@ build_info: output: "target/nextflow/tx2gene" executable: "target/nextflow/tx2gene/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -197,7 +197,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/tx2gene/main.nf b/target/nextflow/tx2gene/main.nf index 7092bcc..abf7cb0 100644 --- a/target/nextflow/tx2gene/main.nf +++ b/target/nextflow/tx2gene/main.nf @@ -2924,7 +2924,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3043,8 +3043,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/tx2gene", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3061,7 +3061,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/tximport/.config.vsh.yaml b/target/nextflow/tximport/.config.vsh.yaml index aeea5b1..089999f 100644 --- a/target/nextflow/tximport/.config.vsh.yaml +++ b/target/nextflow/tximport/.config.vsh.yaml @@ -146,7 +146,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -251,8 +251,8 @@ build_info: output: "target/nextflow/tximport" executable: "target/nextflow/tximport/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -263,7 +263,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/tximport/main.nf b/target/nextflow/tximport/main.nf index e69d0a3..2eea968 100644 --- a/target/nextflow/tximport/main.nf +++ b/target/nextflow/tximport/main.nf @@ -2993,7 +2993,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3121,8 +3121,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/tximport", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3139,7 +3139,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/tximport/tximport.r b/target/nextflow/tximport/tximport.r index c47f8e6..5036399 100755 --- a/target/nextflow/tximport/tximport.r +++ b/target/nextflow/tximport/tximport.r @@ -137,5 +137,6 @@ if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) done <- lapply(params, write_se_table) # Output session information and citations -citation("tximeta") +# Removed for now because the 'tximeta' package is not found sometimes +# citation("tximeta") sessionInfo() \ No newline at end of file diff --git a/target/nextflow/ucsc/bedclip/.config.vsh.yaml b/target/nextflow/ucsc/bedclip/.config.vsh.yaml index e7ee1c8..7d9e875 100644 --- a/target/nextflow/ucsc/bedclip/.config.vsh.yaml +++ b/target/nextflow/ucsc/bedclip/.config.vsh.yaml @@ -65,7 +65,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -164,8 +164,8 @@ build_info: output: "target/nextflow/ucsc/bedclip" executable: "target/nextflow/ucsc/bedclip/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -176,7 +176,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/ucsc/bedclip/main.nf b/target/nextflow/ucsc/bedclip/main.nf index ae37fe3..aeda798 100644 --- a/target/nextflow/ucsc/bedclip/main.nf +++ b/target/nextflow/ucsc/bedclip/main.nf @@ -2898,7 +2898,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3018,8 +3018,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/ucsc/bedclip", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3036,7 +3036,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml b/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml index da8fffe..5fe8812 100644 --- a/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml +++ b/target/nextflow/ucsc/bedgraphtobigwig/.config.vsh.yaml @@ -65,7 +65,7 @@ requirements: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -164,8 +164,8 @@ build_info: output: "target/nextflow/ucsc/bedgraphtobigwig" executable: "target/nextflow/ucsc/bedgraphtobigwig/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" package_config: name: "rnaseq" version: "main" @@ -176,7 +176,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/ucsc/bedgraphtobigwig/main.nf b/target/nextflow/ucsc/bedgraphtobigwig/main.nf index 9e15629..9538074 100644 --- a/target/nextflow/ucsc/bedgraphtobigwig/main.nf +++ b/target/nextflow/ucsc/bedgraphtobigwig/main.nf @@ -2898,7 +2898,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3018,8 +3018,8 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/ucsc/bedgraphtobigwig", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3036,7 +3036,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/umitools/umitools_dedup/main.nf b/target/nextflow/umitools/umitools_dedup/main.nf deleted file mode 100644 index a2b0f9c..0000000 --- a/target/nextflow/umitools/umitools_dedup/main.nf +++ /dev/null @@ -1,3637 +0,0 @@ -// umitools_dedup main -// -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' -class UnexpectedArgumentTypeException extends Exception { - String errorIdentifier - String stage - String plainName - String expectedClass - String foundClass - - // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} - UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { - super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + - "Expected type: ${expectedClass}. Found type: ${foundClass}") - this.errorIdentifier = errorIdentifier - this.stage = stage - this.plainName = plainName - this.expectedClass = expectedClass - this.foundClass = foundClass - } -} - -/** - * Checks if the given value is of the expected type. If not, an exception is thrown. - * - * @param stage The stage of the argument (input or output) - * @param par The parameter definition - * @param value The value to check - * @param errorIdentifier The identifier to use in the error message - * @return The value, if it is of the expected type - * @throws UnexpectedArgumentTypeException If the value is not of the expected type -*/ -def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { - // expectedClass will only be != null if value is not of the expected type - def expectedClass = null - def foundClass = null - - // todo: split if need be - - if (!par.required && value == null) { - expectedClass = null - } else if (par.multiple) { - if (value !instanceof Collection) { - value = [value] - } - - // split strings - value = value.collectMany{ val -> - if (val instanceof String) { - // collect() to ensure that the result is a List and not simply an array - val.split(par.multiple_sep).collect() - } else { - [val] - } - } - - // process globs - if (par.type == "file" && par.direction == "input") { - value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() - } - - // check types of elements in list - try { - value = value.collect { listVal -> - _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) - } - } catch (UnexpectedArgumentTypeException e) { - expectedClass = "List[${e.expectedClass}]" - foundClass = "List[${e.foundClass}]" - } - } else if (par.type == "string") { - // cast to string if need be - if (value instanceof GString) { - value = value.toString() - } - expectedClass = value instanceof String ? null : "String" - } else if (par.type == "integer") { - // cast to integer if need be - if (value instanceof String) { - try { - value = value.toInteger() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" - } else if (par.type == "long") { - // cast to long if need be - if (value instanceof String) { - try { - value = value.toLong() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" - } else if (par.type == "double") { - // cast to double if need be - if (value instanceof String) { - try { - value = value.toDouble() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() - } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" - } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { - // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false - } - } - expectedClass = value instanceof Boolean ? null : "Boolean" - } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { - // cast to path if need be - if (value instanceof String) { - value = file(value, hidden: true) - } - if (value instanceof File) { - value = value.toPath() - } - expectedClass = value instanceof Path ? null : "Path" - } else if (par.type == "file" && stage == "input" && par.direction == "output") { - // cast to string if need be - if (value instanceof GString) { - value = value.toString() - } - expectedClass = value instanceof String ? null : "String" - } else { - // didn't find a match for par.type - expectedClass = par.type - } - - if (expectedClass != null) { - if (foundClass == null) { - foundClass = value.getClass().getName() - } - throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) - } - - return value -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' -Map _processInputValues(Map inputs, Map config, String id, String key) { - if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.required) { - assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" - } - } - - inputs = inputs.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" - - value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") - - [ name, value ] - } - } - return inputs -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { - if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - - outputs = outputs.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && it.direction == "output" } - assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" - - value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") - - [ name, value ] - } - } - return outputs -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' -class IDChecker { - final def items = [] as Set - - @groovy.transform.WithWriteLock - boolean observe(String item) { - if (items.contains(item)) { - return false - } else { - items << item - return true - } - } - - @groovy.transform.WithReadLock - boolean contains(String item) { - return items.contains(item) - } - - @groovy.transform.WithReadLock - Set getItems() { - return items.clone() - } -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' - -// helper functions for reading params from file // -def _getChild(parent, child) { - if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' -/** - * Figure out the param list format based on the file extension - * - * @param param_list A String containing the path to the parameter list file. - * - * @return A String containing the format of the parameter list file. - */ -def _paramListGuessFormat(param_list) { - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } -} - - -/** - * Read the param list - * - * @param param_list One of the following: - * - A String containing the path to the parameter list file (csv, json or yaml), - * - A yaml blob of a list of maps (yaml_blob), - * - Or a groovy list of maps (asis). - * @param config A Map of the Viash configuration. - * - * @return A List of Maps containing the parameters. - */ -def _parseParamList(param_list, Map config) { - // first determine format by extension - def paramListFormat = _paramListGuessFormat(param_list) - - def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? - file(param_list, hidden: true) : - null - - // get the correct parser function for the detected params_list format - def paramSets = [] - if (paramListFormat == "asis") { - paramSets = param_list - } else if (paramListFormat == "yaml_blob") { - paramSets = readYamlBlob(param_list) - } else if (paramListFormat == "yaml") { - paramSets = readYaml(paramListPath) - } else if (paramListFormat == "json") { - paramSets = readJson(paramListPath) - } else if (paramListFormat == "csv") { - paramSets = readCsv(paramListPath) - } else { - error "Format of provided --param_list not recognised.\n" + - "Found: '$paramListFormat'.\n" + - "Expected: a csv file, a json file, a yaml file,\n" + - "a yaml blob or a groovy list of maps." - } - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // id is argument - def idIsArgument = config.allArguments.any{it.plainName == "id"} - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ data -> - def id = data.id - if (!idIsArgument) { - data = data.findAll{k, v -> k != "id"} - } - [id, data] - }) - - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, data -> - data = _splitParams(data, config) - [id, data] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListPath) { - paramSets = paramSets.collect({ id, data -> - def new_data = data.collectEntries{ parName, parValue -> - def par = config.allArguments.find{it.plainName == parName} - if (par && par.type == "file" && par.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collectMany{path -> - def x = _resolveSiblingIfNotAbsolute(path, paramListPath) - x instanceof Collection ? x : [x] - } - } else { - parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) - } - } - [parName, parValue] - } - [id, new_data] - }) - } - - return paramSets -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - // todo: fetch key from run args - def key_ = config.name - - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - - /* process params_list arguments */ - /*********************************/ - def paramList = params.containsKey("param_list") && params.param_list != null ? - params.param_list : [] - // if (paramList instanceof String) { - // paramList = [paramList] - // } - // def paramSets = paramList.collectMany{ _parseParamList(it, config) } - // TODO: be able to process param_list when it is a list of strings - def paramSets = _parseParamList(paramList, config) - if (paramSets.isEmpty()) { - paramSets = [[null, [:]]] - } - - /* combine arguments into channel */ - /**********************************/ - def processedParams = paramSets.indexed().collect{ index, tup -> - // Process ID - def id = tup[0] ?: globalID - - if (workflow.stubRun && !id) { - // if stub run, explicitly add an id if missing - id = "stub${index}" - } - assert id != null: "Each parameter set should have at least an 'id'" - - // Process params - def parValues = globalParams + tup[1] - // // Remove parameters which are null, if the default is also null - // parValues = parValues.collectEntries{paramName, paramValue -> - // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - // if ( paramValue != null || parameterSettings.get("default", null) != null ) { - // [paramName, paramValue] - // } - // } - parValues = parValues.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" - - if (par == null) { - return [:] - } - value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") - - [ name, value ] - } - - [id, parValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - def processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' -def checkUniqueIds(Map args) { - def stopOnError = args.stopOnError == null ? args.stopOnError : true - - def idChecker = new IDChecker() - - return filter { tup -> - if (!idChecker.observe(tup[0])) { - if (stopOnError) { - error "Duplicate id: ${tup[0]}" - } else { - log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" - return false - } - } - return true - } -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' -// This helper file will be deprecated soon -preprocessInputsDeprecationWarningPrinted = false - -def preprocessInputsDeprecationWarning() { - if (!preprocessInputsDeprecationWarningPrinted) { - preprocessInputsDeprecationWarningPrinted = true - System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") - } -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - preprocessInputsDeprecationWarning() - - def config = args.config - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - def key_ = args.key ?: config.name - - // Get different parameter types (used throughout this function) - def defaultArgs = config.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - map { tup -> - def id = tup[0] - def data = tup[1] - def passthrough = tup.drop(2) - - def new_data = (defaultArgs + data).collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - - if (par != null) { - value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") - } - - [ name, value ] - } - - [ id, new_data ] + passthrough - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' -/** - * Run a list of components on a stream of data. - * - * @param components: list of Viash VDSL3 modules to run - * @param fromState: a closure, a map or a list of keys to extract from the input data. - * If a closure, it will be called with the id, the data and the component config. - * @param toState: a closure, a map or a list of keys to extract from the output data - * If a closure, it will be called with the id, the output data, the old state and the component config. - * @param filter: filter function to apply to the input. - * It will be called with the id, the data and the component config. - * @param id: id to use for the output data - * If a closure, it will be called with the id, the data and the component config. - * @param auto: auto options to pass to the components - * - * @return: a workflow that runs the components - **/ -def runComponents(Map args) { - log.warn("runComponents is deprecated, use runEach instead") - assert args.components: "runComponents should be passed a list of components to run" - - def components_ = args.components - if (components_ !instanceof List) { - components_ = [ components_ ] - } - assert components_.size() > 0: "pass at least one component to runComponents" - - def fromState_ = args.fromState - def toState_ = args.toState - def filter_ = args.filter - def id_ = args.id - - workflow runComponentsWf { - take: input_ch - main: - - // generate one channel per method - out_chs = components_.collect{ comp_ -> - def comp_config = comp_.config - - def filter_ch = filter_ - ? input_ch | filter{tup -> - filter_(tup[0], tup[1], comp_config) - } - : input_ch - def id_ch = id_ - ? filter_ch | map{tup -> - // def new_id = id_(tup[0], tup[1], comp_config) - def new_id = tup[0] - if (id_ instanceof String) { - new_id = id_ - } else if (id_ instanceof Closure) { - new_id = id_(new_id, tup[1], comp_config) - } - [new_id] + tup.drop(1) - } - : filter_ch - def data_ch = id_ch | map{tup -> - def new_data = tup[1] - if (fromState_ instanceof Map) { - new_data = fromState_.collectEntries{ key0, key1 -> - [key0, new_data[key1]] - } - } else if (fromState_ instanceof List) { - new_data = fromState_.collectEntries{ key -> - [key, new_data[key]] - } - } else if (fromState_ instanceof Closure) { - new_data = fromState_(tup[0], new_data, comp_config) - } - tup.take(1) + [new_data] + tup.drop(1) - } - def out_ch = data_ch - | comp_.run( - auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] - ) - def post_ch = toState_ - ? out_ch | map{tup -> - def output = tup[1] - def old_state = tup[2] - def new_state = null - if (toState_ instanceof Map) { - new_state = old_state + toState_.collectEntries{ key0, key1 -> - [key0, output[key1]] - } - } else if (toState_ instanceof List) { - new_state = old_state + toState_.collectEntries{ key -> - [key, output[key]] - } - } else if (toState_ instanceof Closure) { - new_state = toState_(tup[0], output, old_state, comp_config) - } - [tup[0], new_state] + tup.drop(3) - } - : out_ch - - post_ch - } - - // mix all results - output_ch = - (out_chs.size == 1) - ? out_chs[0] - : out_chs[0].mix(*out_chs.drop(1)) - - emit: output_ch - } - - return runComponentsWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' -/** - * Run a list of components on a stream of data. - * - * @param components: list of Viash VDSL3 modules to run - * @param fromState: a closure, a map or a list of keys to extract from the input data. - * If a closure, it will be called with the id, the data and the component itself. - * @param toState: a closure, a map or a list of keys to extract from the output data - * If a closure, it will be called with the id, the output data, the old state and the component itself. - * @param filter: filter function to apply to the input. - * It will be called with the id, the data and the component itself. - * @param id: id to use for the output data - * If a closure, it will be called with the id, the data and the component itself. - * @param auto: auto options to pass to the components - * - * @return: a workflow that runs the components - **/ -def runEach(Map args) { - assert args.components: "runEach should be passed a list of components to run" - - def components_ = args.components - if (components_ !instanceof List) { - components_ = [ components_ ] - } - assert components_.size() > 0: "pass at least one component to runEach" - - def fromState_ = args.fromState - def toState_ = args.toState - def filter_ = args.filter - def runIf_ = args.runIf - def id_ = args.id - - assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." - - workflow runEachWf { - take: input_ch - main: - - // generate one channel per method - out_chs = components_.collect{ comp_ -> - def filter_ch = filter_ - ? input_ch | filter{tup -> - filter_(tup[0], tup[1], comp_) - } - : input_ch - def id_ch = id_ - ? filter_ch | map{tup -> - def new_id = id_ - if (new_id instanceof Closure) { - new_id = new_id(tup[0], tup[1], comp_) - } - assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" - [new_id] + tup.drop(1) - } - : filter_ch - def chPassthrough = null - def chRun = null - if (runIf_) { - def idRunIfBranch = id_ch.branch{ tup -> - run: runIf_(tup[0], tup[1], comp_) - passthrough: true - } - chPassthrough = idRunIfBranch.passthrough - chRun = idRunIfBranch.run - } else { - chRun = id_ch - chPassthrough = Channel.empty() - } - def data_ch = chRun | map{tup -> - def new_data = tup[1] - if (fromState_ instanceof Map) { - new_data = fromState_.collectEntries{ key0, key1 -> - [key0, new_data[key1]] - } - } else if (fromState_ instanceof List) { - new_data = fromState_.collectEntries{ key -> - [key, new_data[key]] - } - } else if (fromState_ instanceof Closure) { - new_data = fromState_(tup[0], new_data, comp_) - } - tup.take(1) + [new_data] + tup.drop(1) - } - def out_ch = data_ch - | comp_.run( - auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] - ) - def post_ch = toState_ - ? out_ch | map{tup -> - def output = tup[1] - def old_state = tup[2] - def new_state = null - if (toState_ instanceof Map) { - new_state = old_state + toState_.collectEntries{ key0, key1 -> - [key0, output[key1]] - } - } else if (toState_ instanceof List) { - new_state = old_state + toState_.collectEntries{ key -> - [key, output[key]] - } - } else if (toState_ instanceof Closure) { - new_state = toState_(tup[0], output, old_state, comp_) - } - [tup[0], new_state] + tup.drop(3) - } - : out_ch - - def return_ch = post_ch - | concat(chPassthrough) - - return_ch - } - - // mix all results - output_ch = - (out_chs.size == 1) - ? out_chs[0] - : out_chs[0].mix(*out_chs.drop(1)) - - emit: output_ch - } - - return runEachWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' -/** - * Join sourceChannel to targetChannel - * - * This function joins the sourceChannel to the targetChannel. - * However, each id in the targetChannel must be present in the - * sourceChannel. If _meta.join_id exists in the targetChannel, that is - * used as an id instead. If the id doesn't match any id in the sourceChannel, - * an error is thrown. - */ - -def safeJoin(targetChannel, sourceChannel, key) { - def sourceIDs = new IDChecker() - - def sourceCheck = sourceChannel - | map { tup -> - sourceIDs.observe(tup[0]) - tup - } - def targetCheck = targetChannel - | map { tup -> - def id = tup[0] - - if (!sourceIDs.contains(id)) { - error ( - "Error in module '${key}' when merging output with original state.\n" + - " Reason: output with id '${id}' could not be joined with source channel.\n" + - " If the IDs in the output channel differ from the input channel,\n" + - " please set `tup[1]._meta.join_id to the original ID.\n" + - " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + - " Unexpected ID in the output channel: '${id}'.\n" + - " Example input event: [\"id\", [input: file(...)]],\n" + - " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" - ) - } - // TODO: add link to our documentation on how to fix this - - tup - } - - sourceCheck.cross(targetChannel) - | map{ left, right -> - right + left.drop(1) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' -def _processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - // add default values to output files which haven't already got a default - if (arg.type == "file" && arg.direction == "output" && arg.default == null) { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - if (arg.multiple) { - arg.default = [arg.default] - } - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' -def addGlobalArguments(config) { - def localConfig = [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ] - // TODO: allow multiple: true in param_list? - // TODO: allow to specify a --param_list_regex to filter the param_list? - // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? - ] - ] - ] - ] - - return processConfig(_mergeMap(config, localConfig)) -} - -def _mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = _mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' -def _generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def _generateHelp(config) { - def fun = config - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -// based on Format._paragraphWrap -def _paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def helpMessage(config) { - if (params.containsKey("help") && params.help) { - def mergedConfig = addGlobalArguments(config) - def helpStr = _generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' -def processConfig(config) { - // set defaults for arguments - config.arguments = - (config.arguments ?: []).collect{_processArgument(it)} - - // set defaults for argument_group arguments - config.argument_groups = - (config.argument_groups ?: []).collect{grp -> - grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} - grp - } - - // create combined arguments list - config.allArguments = - config.arguments + - config.argument_groups.collectMany{it.arguments} - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.argument_groups - if (argGroups.any{it.name.toLowerCase() == "arguments"}) { - argGroups = argGroups.collect{ grp -> - if (grp.name.toLowerCase() == "arguments") { - grp = grp + [ - arguments: grp.arguments + config.arguments - ] - } - grp - } - } else { - argGroups = argGroups + [ - name: "Arguments", - arguments: config.arguments - ] - } - config.allArgumentGroups = argGroups - - config -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' - -def readConfig(file) { - def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) - processConfig(config) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' -/** - * Resolve a path relative to the current file. - * - * @param str The path to resolve, as a String. - * @param parentPath The path to resolve relative to, as a Path. - * - * @return The path that may have been resovled, as a Path. - */ -def _resolveSiblingIfNotAbsolute(str, parentPath) { - if (str !instanceof String) { - return str - } - if (!_stringIsAbsolutePath(str)) { - return parentPath.resolveSibling(str) - } else { - return file(str, hidden: true) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' -/** - * Check whether a path as a string is absolute. - * - * In the past, we tried using `file(., relative: true).isAbsolute()`, - * but the 'relative' option was added in 22.10.0. - * - * @param path The path to check, as a String. - * - * @return Whether the path is absolute, as a boolean. - */ -def _stringIsAbsolutePath(path) { - def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ - - assert path instanceof String - return _resolve_URL_PROTOCOL.matcher(path).matches() -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' -class CustomTraceObserver implements nextflow.trace.TraceObserver { - List traces - - CustomTraceObserver(List traces) { - this.traces = traces - } - - @Override - void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { - def trace2 = trace.store.clone() - trace2.script = null - traces.add(trace2) - } - - @Override - void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { - def trace2 = trace.store.clone() - trace2.script = null - traces.add(trace2) - } -} - -def collectTraces() { - def traces = Collections.synchronizedList([]) - - // add custom trace observer which stores traces in the traces object - session.observers.add(new CustomTraceObserver(traces)) - - traces -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' -/** - * Performs a deep clone of the given object. - * @param x an object - */ -def deepClone(x) { - iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' -def getPublishDir() { - return params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' - -// Recurse upwards until we find a '.build.yaml' file -def _findBuildYamlFile(pathPossiblySymlink) { - def path = pathPossiblySymlink.toRealPath() - def child = path.resolve(".build.yaml") - if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { - return child - } else { - def parent = path.getParent() - if (parent == null) { - return null - } else { - return _findBuildYamlFile(parent) - } - } -} - -// get the root of the target folder -def getRootDir() { - def dir = _findBuildYamlFile(meta.resources_dir) - assert dir != null: "Could not find .build.yaml in the folder structure" - dir.getParent() -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' -/** - * Recursively apply a function over the leaves of an object. - * @param obj The object to iterate over. - * @param fun The function to apply to each value. - * @return The object with the function applied to each value. - */ -def iterateMap(obj, fun) { - if (obj instanceof List && obj !instanceof String) { - return obj.collect{item -> - iterateMap(item, fun) - } - } else if (obj instanceof Map) { - return obj.collectEntries{key, item -> - [key.toString(), iterateMap(item, fun)] - } - } else { - return fun(obj) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' -/** - * A view for printing the event of each channel as a YAML blob. - * This is useful for debugging. - */ -def niceView() { - workflow niceViewWf { - take: input - main: - output = input - | view{toYamlBlob(it)} - emit: output - } - return niceViewWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') - - def br = java.nio.file.Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - def m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - def jsonSlurper = new groovy.json.JsonSlurper() - jsonSlurper.parse(inputFile) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' -def readJsonBlob(str) { - def jsonSlurper = new groovy.json.JsonSlurper() - jsonSlurper.parseText(str) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' -// Custom constructor to modify how certain objects are parsed from YAML -class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { - Path root - - class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { - public Object construct(org.yaml.snakeyaml.nodes.Node node) { - String filename = (String) constructScalar(node); - if (root != null) { - return root.resolve(filename); - } - return java.nio.file.Paths.get(filename); - } - } - - CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { - super(options) - this.root = root - // Handling !file tag and parse it back to a File type - this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) - } -} - -def readTaggedYaml(Path path) { - def options = new org.yaml.snakeyaml.LoaderOptions() - def constructor = new CustomConstructor(options, path.getParent()) - def yaml = new org.yaml.snakeyaml.Yaml(constructor) - return yaml.load(path.text) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - def yamlSlurper = new org.yaml.snakeyaml.Yaml() - yamlSlurper.load(inputFile) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' -def readYamlBlob(str) { - def yamlSlurper = new org.yaml.snakeyaml.Yaml() - yamlSlurper.load(str) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' -String toJsonBlob(data) { - return groovy.json.JsonOutput.toJson(data) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' -// Custom representer to modify how certain objects are represented in YAML -class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { - Path relativizer - - class RepresentPath implements org.yaml.snakeyaml.representer.Represent { - public String getFileName(Object obj) { - if (obj instanceof File) { - obj = ((File) obj).toPath(); - } - if (obj !instanceof Path) { - throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); - } - def path = (Path) obj; - - if (relativizer != null) { - return relativizer.relativize(path).toString() - } else { - return path.toString() - } - } - - public org.yaml.snakeyaml.nodes.Node representData(Object data) { - String filename = getFileName(data); - def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); - return representScalar(tag, filename); - } - } - CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { - super(options) - this.relativizer = relativizer - this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) - this.representers.put(Path, new RepresentPath()) - this.representers.put(File, new RepresentPath()) - } -} - -String toTaggedYamlBlob(data) { - return toRelativeTaggedYamlBlob(data, null) -} -String toRelativeTaggedYamlBlob(data, Path relativizer) { - def options = new org.yaml.snakeyaml.DumperOptions() - options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) - def representer = new CustomRepresenter(options, relativizer) - def yaml = new org.yaml.snakeyaml.Yaml(representer, options) - return yaml.dump(data) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' -String toYamlBlob(data) { - def options = new org.yaml.snakeyaml.DumperOptions() - options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) - options.setPrettyFlow(true) - def yaml = new org.yaml.snakeyaml.Yaml(options) - def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) - return yaml.dump(cleanData) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' -void writeJson(data, file) { - assert data: "writeJson: data should not be null" - assert file: "writeJson: file should not be null" - file.write(toJsonBlob(data)) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' -void writeYaml(data, file) { - assert data: "writeYaml: data should not be null" - assert file: "writeYaml: file should not be null" - file.write(toYamlBlob(data)) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' -def findStates(Map params, Map config) { - def auto_config = deepClone(config) - def auto_params = deepClone(params) - - auto_config = auto_config.clone() - // override arguments - auto_config.argument_groups = [] - auto_config.arguments = [ - [ - type: "string", - name: "--id", - description: "A dummy identifier", - required: false - ], - [ - type: "file", - name: "--input_states", - example: "/path/to/input/directory/**/state.yaml", - description: "Path to input directory containing the datasets to be integrated.", - required: true, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--filter", - example: "foo/.*/state.yaml", - description: "Regex to filter state files by path.", - required: false - ], - // to do: make this a yaml blob? - [ - type: "string", - name: "--rename_keys", - example: ["newKey1:oldKey1", "newKey2:oldKey2"], - description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", - required: false, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--settings", - example: '{"output_dataset": "dataset.h5ad", "k": 10}', - description: "Global arguments as a JSON glob to be passed to all components.", - required: false - ] - ] - if (!(auto_params.containsKey("id"))) { - auto_params["id"] = "auto" - } - - // run auto config through processConfig once more - auto_config = processConfig(auto_config) - - workflow findStatesWf { - helpMessage(auto_config) - - output_ch = - channelFromParams(auto_params, auto_config) - | flatMap { autoId, args -> - - def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] - - // look for state files in input dir - def stateFiles = args.input_states - - // filter state files by regex - if (args.filter) { - stateFiles = stateFiles.findAll{ stateFile -> - def stateFileStr = stateFile.toString() - def matcher = stateFileStr =~ args.filter - matcher.matches()} - } - - // read in states - def states = stateFiles.collect { stateFile -> - def state_ = readTaggedYaml(stateFile) - [state_.id, state_] - } - - // construct renameMap - if (args.rename_keys) { - def renameMap = args.rename_keys.collectEntries{renameString -> - def split = renameString.split(":") - assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" - split - } - - // rename keys in state, only let states through which have all keys - // also add global settings - states = states.collectMany{id, state -> - def newState = [:] - - for (key in renameMap.keySet()) { - def origKey = renameMap[key] - if (!(state.containsKey(origKey))) { - return [] - } - newState[key] = state[origKey] - } - - [[id, globalSettings + newState]] - } - } - - states - } - emit: - output_ch - } - - return findStatesWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' -def joinStates(Closure apply_) { - workflow joinStatesWf { - take: input_ch - main: - output_ch = input_ch - | toSortedList - | filter{ it.size() > 0 } - | map{ tups -> - def ids = tups.collect{it[0]} - def states = tups.collect{it[1]} - apply_(ids, states) - } - - emit: output_ch - } - return joinStatesWf -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' -def collectFiles(obj) { - if (obj instanceof java.io.File || obj instanceof Path) { - return [obj] - } else if (obj instanceof List && obj !instanceof String) { - return obj.collectMany{item -> - collectFiles(item) - } - } else if (obj instanceof Map) { - return obj.collectMany{key, item -> - collectFiles(item) - } - } else { - return [] - } -} - -/** - * Recurse through a state and collect all input files and their target output filenames. - * @param obj The state to recurse through. - * @param prefix The prefix to prepend to the output filenames. - */ -def collectInputOutputPaths(obj, prefix) { - if (obj instanceof File || obj instanceof Path) { - def path = obj instanceof Path ? obj : obj.toPath() - def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" - def newFilename = prefix + ext - return [[obj, newFilename]] - } else if (obj instanceof List && obj !instanceof String) { - return obj.withIndex().collectMany{item, ix -> - collectInputOutputPaths(item, prefix + "_" + ix) - } - } else if (obj instanceof Map) { - return obj.collectMany{key, item -> - collectInputOutputPaths(item, prefix + "." + key) - } - } else { - return [] - } -} - -def publishStates(Map args) { - def key_ = args.get("key") - def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) - - assert key_ != null : "publishStates: key must be specified" - - workflow publishStatesWf { - take: input_ch - main: - input_ch - | map { tup -> - def id_ = tup[0] - def state_ = tup[1] - - // the input files and the target output filenames - def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] - - def yamlFilename = yamlTemplate_ - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - - // TODO: do the pathnames in state_ match up with the outputFilenames_? - - // convert state to yaml blob - def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] - } - | publishStatesProc - emit: input_ch - } - return publishStatesWf -} -process publishStatesProc { - // todo: check publishpath? - publishDir path: "${getPublishDir()}/", mode: "copy" - tag "$id" - input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) - output: - tuple val(id), path{[yamlFile] + outputFiles} - script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } - """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" -} - - -// this assumes that the state contains no other values other than those specified in the config -def publishStatesByConfig(Map args) { - def config = args.get("config") - assert config != null : "publishStatesByConfig: config must be specified" - - def key_ = args.get("key", config.name) - assert key_ != null : "publishStatesByConfig: key must be specified" - - workflow publishStatesSimpleWf { - take: input_ch - main: - input_ch - | map { tup -> - def id_ = tup[0] - def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] - def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] - - // TODO: allow overriding the state.yaml template - // TODO TODO: if auto.publish == "state", add output_state as an argument - def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' - def yamlFilename = yamlTemplate - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where - // - key is a String - // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] - // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) - def processedState = - config.allArguments - .findAll { it.direction == "output" } - .collectMany { par -> - def plainName_ = par.plainName - // if the state does not contain the key, it's an - // optional argument for which the component did - // not generate any output - if (!state_.containsKey(plainName_)) { - return [] - } - def value = state_[plainName_] - // if the parameter is not a file, it should be stored - // in the state as-is, but is not something that needs - // to be copied from the source path to the dest path - if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] - } - // if the orig state does not contain this filename, - // it's an optional argument for which the user specified - // that it should not be returned as a state - if (!origState_.containsKey(plainName_)) { - return [] - } - def filenameTemplate = origState_[plainName_] - // if the pararameter is multiple: true, fetch the template - if (par.multiple && filenameTemplate instanceof List) { - filenameTemplate = filenameTemplate[0] - } - // instantiate the template - def filename = filenameTemplate - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - if (par.multiple) { - // if the parameter is multiple: true, the filename - // should contain a wildcard '*' that is replaced with - // the index of the file - assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" - def outputPerFile = value.withIndex().collect{ val, ix -> - def filename_ix = filename.replace("*", ix.toString()) - def value_ = java.nio.file.Paths.get(filename_ix) - // if id contains a slash - if (yamlDir != null) { - value_ = yamlDir.relativize(value_) - } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] - } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] - } else { - def value_ = java.nio.file.Paths.get(filename) - // if id contains a slash - if (yamlDir != null) { - value_ = yamlDir.relativize(value_) - } - def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] - } - } - - def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} - - // convert state to yaml blob - def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] - } - | publishStatesProc - emit: input_ch - } - return publishStatesSimpleWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' -def setState(fun) { - assert fun instanceof Closure || fun instanceof Map || fun instanceof List : - "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" - - // if fun is a List, convert to map - if (fun instanceof List) { - // check whether fun is a list[string] - assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" - fun = fun.collectEntries{[it, it]} - } - - // if fun is a map, convert to closure - if (fun instanceof Map) { - // check whether fun is a map[string, string] - assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" - assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" - def funMap = fun.clone() - // turn the map into a closure to be used later on - fun = { id_, state_ -> - assert state_ instanceof Map : "Error in setState: the state is not a Map" - funMap.collectMany{newkey, origkey -> - if (state_.containsKey(origkey)) { - [[newkey, state_[origkey]]] - } else { - [] - } - }.collectEntries() - } - } - - map { tup -> - def id = tup[0] - def state = tup[1] - def unfilteredState = fun(id, state) - def newState = unfilteredState.findAll{key, val -> val != null} - [id, newState] + tup.drop(2) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - // check for unexpected keys - def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - def unexpectedKeys = auto.keySet() - expectedKeys - assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" - - // check auto.simplifyInput - assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" - - // check auto.simplifyOutput - assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" - - // check auto.transcript - assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" - - // check auto.publish - assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" - - return auto.subMap(expectedKeys) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - // check for unexpected keys - def expectedKeys = [ - "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" - ] - def unexpectedKeys = drctv.keySet() - expectedKeys - assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' -def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { - // override defaults with args - def workflowArgs = defaultWfArgs + args - - // check whether 'key' exists - assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (workflowArgs["key"] instanceof Closure) { - workflowArgs["key"] = workflowArgs["key"](meta.config.name) - } - def key = workflowArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check for any unexpected keys - def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] - def unexpectedKeys = workflowArgs.keySet() - expectedKeys - assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" - - // check whether directives exists and apply defaults - assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" - workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) - - // check whether directives exists and apply defaults - assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" - workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) - - // auto define publish, if so desired - if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = getPublishDir() - - if (publishDir != null) { - workflowArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (workflowArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] - workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { - if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { - assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" - } - } - - // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? - for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { - if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { - log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." - } - } - - // check fromState - workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) - - // check toState - workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) - - // return output - return workflowArgs -} - -def _processFromState(fromState, key_, config_) { - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState == null) { - return null - } - - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key_': the state is not a Map" - def data = fromStateMap.collectMany{newkey, origkey -> - // check whether newkey corresponds to a required argument - if (state.containsKey(origkey)) { - [[newkey, state[origkey]]] - } else if (!requiredInputNames.contains(origkey)) { - [] - } else { - throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") - } - }.collectEntries() - data - } - } - - return fromState -} - -def _processToState(toState, key_, config_) { - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key_': the output is not a Map" - assert state instanceof Map : "Error in module '$key_': the state is not a Map" - def extraEntries = toStateMap.collectMany{newkey, origkey -> - // check whether newkey corresponds to a required argument - if (output.containsKey(origkey)) { - [[newkey, output[origkey]]] - } else if (!requiredOutputNames.contains(origkey)) { - [] - } else { - throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") - } - }.collectEntries() - state + extraEntries - } - } - - return toState -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' -def _debug(workflowArgs, debugKey) { - if (workflowArgs.debug) { - view { "process '${workflowArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -// depends on: innerWorkflowFactory -def workflowFactory(Map args, Map defaultWfArgs, Map meta) { - def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) - def key_ = workflowArgs["key"] - - workflow workflowInstance { - take: input_ - - main: - def chModified = input_ - | checkUniqueIds([:]) - | _debug(workflowArgs, "input") - | map { tuple -> - tuple = deepClone(tuple) - - if (workflowArgs.map) { - tuple = workflowArgs.map(tuple) - } - if (workflowArgs.mapId) { - tuple[0] = workflowArgs.mapId(tuple[0]) - } - if (workflowArgs.mapData) { - tuple[1] = workflowArgs.mapData(tuple[1]) - } - if (workflowArgs.mapPassthrough) { - tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - if (tuple[0] instanceof GString) { - tuple[0] = tuple[0].toString() - } - assert tuple[0] instanceof CharSequence : - "Error in module '${key_}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key_}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (workflowArgs.renameKeys) { - assert workflowArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - workflowArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - - def chRun = null - def chPassthrough = null - if (workflowArgs.runIf) { - def runIfBranch = chModified.branch{ tup -> - run: workflowArgs.runIf(tup[0], tup[1]) - passthrough: true - } - chRun = runIfBranch.run - chPassthrough = runIfBranch.passthrough - } else { - chRun = chModified - chPassthrough = Channel.empty() - } - - def chRunFiltered = workflowArgs.filter ? - chRun | filter{workflowArgs.filter(it)} : - chRun - - def chArgs = workflowArgs.fromState ? - chRunFiltered | map{ - def new_data = workflowArgs.fromState(it.take(2)) - [it[0], new_data] - } : - chRunFiltered | map {tup -> tup.take(2)} - - // fill in defaults - def chArgsWithDefaults = chArgs - | map { tuple -> - def id_ = tuple[0] - def data_ = tuple[1] - - // TODO: could move fromState to here - - // fetch default params from functionality - def defaultArgs = meta.config.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = meta.config.allArguments - .findAll { par -> - def argKey = key_ + "__" + par.plainName - params.containsKey(argKey) - } - .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = meta.config.allArguments - .findAll { data_.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data_[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs - .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} - - combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) - - [id_, combinedArgs] + tuple.drop(2) - } - - // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults - | _debug(workflowArgs, "processed") - // run workflow - | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> - - // see if output map contains metadata - def meta_ = - output_ instanceof Map && output_.containsKey("_meta") ? - output_["_meta"] : - [:] - def join_id = meta_.join_id ?: id_ - - // remove metadata - output_ = output_.findAll{k, v -> k != "_meta"} - - // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) - - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] - } - // | view{"chInitialOutput: ${it.take(3)}"} - - // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) - // input tuple format: [join_id, id, output, prev_state, ...] - // output tuple format: [join_id, id, new_state, ...] - | map{ tup -> - def new_state = workflowArgs.toState(tup.drop(1).take(3)) - tup.take(2) + [new_state] + tup.drop(4) - } - - if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState - // input tuple format: [join_id, id, new_state, ...] - // output tuple format: [join_id, id, new_state] - | map{ tup -> - tup.take(3) - } - - safeJoin(chPublish, chArgsWithDefaults, key_) - // input tuple format: [join_id, id, new_state, orig_state, ...] - // output tuple format: [id, new_state, orig_state] - | map { tup -> - tup.drop(1).take(3) - } - | publishStatesByConfig(key: key_, config: meta.config) - } - - // remove join_id and meta - chReturn = chNewState - | map { tup -> - // input tuple format: [join_id, id, new_state, ...] - // output tuple format: [id, new_state, ...] - tup.drop(1) - } - | _debug(workflowArgs, "output") - | concat(chPassthrough) - - emit: chReturn - } - - def wf = workflowInstance.cloneWithName(key_) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs, workflowArgs, meta) - } - // add config to module for later introspection - wf.metaClass.config = meta.config - - return wf -} - -nextflow.enable.dsl=2 - -// START COMPONENT-SPECIFIC CODE - -// create meta object -meta = [ - "resources_dir": moduleDir.toRealPath().normalize(), - "config": processConfig(readJsonBlob('''{ - "name" : "umitools_dedup", - "namespace" : "umitools", - "version" : "main", - "argument_groups" : [ - { - "name" : "Input", - "arguments" : [ - { - "type" : "boolean", - "name" : "--paired", - "description" : "Paired fastq files or not?", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--bam", - "description" : "Input BAM file", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--bai", - "description" : "BAM index", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "boolean", - "name" : "--get_output_stats", - "description" : "Whether or not to generate output stats.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - } - ] - }, - { - "name" : "Output", - "arguments" : [ - { - "type" : "file", - "name" : "--output_bam", - "description" : "Deduplicated BAM file", - "default" : [ - "$id.$key.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--output_stats", - "description" : "Directory containing UMI based dedupllication statistics files", - "default" : [ - "$id.umi_dedup.stats" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - } - ] - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true - } - ], - "description" : "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n", - "test_resources" : [ - { - "type" : "bash_script", - "path" : "test.sh", - "is_executable" : true - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/chr19.bam" - }, - { - "type" : "file", - "path" : "/testData/unit_test_resources/chr19.bam.bai" - } - ], - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/nf-core/umitools/dedup/main.nf", - "modules/nf-core/umitools/dedup/meta.yml" - ], - "last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33" - } - }, - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], - "runners" : [ - { - "type" : "executable", - "id" : "executable", - "docker_setup_strategy" : "ifneedbepullelsecachedbuild" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1000000000.B", - "mem2gb" : "memory = 2000000000.B", - "mem5gb" : "memory = 5000000000.B", - "mem10gb" : "memory = 10000000000.B", - "mem20gb" : "memory = 20000000000.B", - "mem50gb" : "memory = 50000000000.B", - "mem100gb" : "memory = 100000000000.B", - "mem200gb" : "memory = 200000000000.B", - "mem500gb" : "memory = 500000000000.B", - "mem1tb" : "memory = 1000000000000.B", - "mem2tb" : "memory = 2000000000000.B", - "mem5tb" : "memory = 5000000000000.B", - "mem10tb" : "memory = 10000000000000.B", - "mem20tb" : "memory = 20000000000000.B", - "mem50tb" : "memory = 50000000000000.B", - "mem100tb" : "memory = 100000000000000.B", - "mem200tb" : "memory = 200000000000000.B", - "mem500tb" : "memory = 500000000000000.B", - "mem1gib" : "memory = 1073741824.B", - "mem2gib" : "memory = 2147483648.B", - "mem4gib" : "memory = 4294967296.B", - "mem8gib" : "memory = 8589934592.B", - "mem16gib" : "memory = 17179869184.B", - "mem32gib" : "memory = 34359738368.B", - "mem64gib" : "memory = 68719476736.B", - "mem128gib" : "memory = 137438953472.B", - "mem256gib" : "memory = 274877906944.B", - "mem512gib" : "memory = 549755813888.B", - "mem1tib" : "memory = 1099511627776.B", - "mem2tib" : "memory = 2199023255552.B", - "mem4tib" : "memory = 4398046511104.B", - "mem8tib" : "memory = 8796093022208.B", - "mem16tib" : "memory = 17592186044416.B", - "mem32tib" : "memory = 35184372088832.B", - "mem64tib" : "memory = 70368744177664.B", - "mem128tib" : "memory = 140737488355328.B", - "mem256tib" : "memory = 281474976710656.B", - "mem512tib" : "memory = 562949953421312.B", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "engines" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:22.04", - "target_registry" : "images.viash-hub.com", - "target_tag" : "main", - "namespace_separator" : "/", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "pip" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "umi_tools" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - } - ], - "build_info" : { - "config" : "/workdir/root/repo/src/umitools/umitools_dedup/config.vsh.yaml", - "runner" : "nextflow", - "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/umitools/umitools_dedup", - "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" - }, - "package_config" : { - "name" : "rnaseq", - "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], - "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", - "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", - ".engines += { type: \\"native\\" }", - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", - ".engines[.type == 'docker'].target_tag := 'main'" - ], - "organization" : "vsh" - } -}''')) -] - -// resolve dependencies dependencies (if any) - - -// inner workflow -// inner workflow hook -def innerWorkflowFactory(args) { - def rawScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi ) -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_BAI+x} ]; then echo "${VIASH_PAR_BAI}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bai='&'#" ; else echo "# par_bai="; fi ) -$( if [ ! -z ${VIASH_PAR_GET_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_GET_OUTPUT_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_get_output_stats='&'#" ; else echo "# par_get_output_stats="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_BAM+x} ]; then echo "${VIASH_PAR_OUTPUT_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_bam='&'#" ; else echo "# par_output_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT_STATS+x} ]; then echo "${VIASH_PAR_OUTPUT_STATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_stats='&'#" ; else echo "# par_output_stats="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -args="--random-seed=100" - -if \\$par_paired; then - paired="--paired" - args+=" --unpaired-reads=discard --chimeric-pairs=discard" -else - paired="" -fi - -if \\$par_get_output_stats; then - mkdir -p \\$par_output_stats - stats="--output-stats \\$par_output_stats/" -else - stats="" -fi - -PYTHONHASHSEED=0 umi_tools dedup -I \\$par_bam -S \\$par_output_bam \\$stats \\$paired \\$args -VIASHMAIN -bash "$tempscript" -''' - - return vdsl3WorkflowFactory(args, meta, rawScript) -} - - - -/** - * Generate a workflow for VDSL3 modules. - * - * This function is called by the workflowFactory() function. - * - * Input channel: [id, input_map] - * Output channel: [id, output_map] - * - * Internally, this workflow will convert the input channel - * to a format which the Nextflow module will be able to handle. - */ -def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { - def key = args["key"] - def processObj = null - - workflow processWf { - take: input_ - main: - - if (processObj == null) { - processObj = _vdsl3ProcessFactory(args, meta, rawScript) - } - - output_ = input_ - | map { tuple -> - def id = tuple[0] - def data_ = tuple[1] - - if (workflow.stubRun) { - // add id if missing - data_ = [id: 'stub'] + data_ - } - - // process input files separately - def inputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = meta.config.allArguments - .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = data_[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val - .replaceAll('\\$id', id) - .replaceAll('\\$\\{id\\}', id) - .replaceAll('\\$key', key) - .replaceAll('\\$\\{key\\}', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] - } - | processObj - | map { output -> - def outputFiles = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - def out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - [ output[0], outputFiles ] - } - emit: output_ - } - - return processWf -} - -// depends on: session? -def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { - // autodetect process key - def wfKey = workflowArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def scriptMeta = nextflow.script.ScriptMeta.current() - def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = workflowArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } - .join() - - def outputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (workflowArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = meta.config.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_NAME="${meta.config.name}" - |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) - | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) - | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) - | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) - | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (workflowArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // write process to temp file - def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") - addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } - tempFile.text = procStr - - // create process from temp file - def binding = new nextflow.script.ScriptBinding([:]) - def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) - .setModule(true) - .setBinding(binding) - def moduleScript = parser.runScript(tempFile) - .getScript() - - // register module in meta - def module = new nextflow.script.IncludeDef.Module(name: procKey) - scriptMeta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return scriptMeta.getProcess(procKey) -} - -// defaults -meta["defaults"] = [ - // key to be used to trace the process and determine output names - key: null, - - // fixed arguments to be passed to script - args: [:], - - // default directives - directives: readJsonBlob('''{ - "container" : { - "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/umitools/umitools_dedup", - "tag" : "main" - }, - "tag" : "$id" -}'''), - - // auto settings - auto: readJsonBlob('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Choose whether or not to run the component on the tuple if the condition is true. - // Otherwise, the tuple will be passed through. - // Example: `{ tup -> tup[0] != "skip_this" }` - runIf: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// initialise default workflow -meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) - -// add workflow to environment -nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) - -// anonymous workflow for running this module as a standalone -workflow { - // add id argument if it's not already in the config - // TODO: deep copy - def newConfig = deepClone(meta.config) - def newParams = deepClone(params) - - def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} - if (!argsContainsId) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - newConfig.arguments.add(0, idArg) - newConfig = processConfig(newConfig) - } - if (!newParams.containsKey("id")) { - newParams.id = "run" - } - - helpMessage(newConfig) - - channelFromParams(newParams, newConfig) - // make sure id is not in the state if id is not in the args - | map {id, state -> - if (!argsContainsId) { - [id, state.findAll{k, v -> k != "id"}] - } else { - [id, state] - } - } - | meta.workflow.run( - auto: [ publish: "state" ] - ) -} - -// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/umitools/umitools_extract/.config.vsh.yaml b/target/nextflow/umitools/umitools_extract/.config.vsh.yaml deleted file mode 100644 index 6bec363..0000000 --- a/target/nextflow/umitools/umitools_extract/.config.vsh.yaml +++ /dev/null @@ -1,283 +0,0 @@ -name: "umitools_extract" -namespace: "umitools" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "boolean" - name: "--paired" - description: "Paired fastq files or not?" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--input" - description: "Input fastq files, either one or two (paired)" - info: null - example: - - "sample.fastq" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: true - multiple_sep: "," - - type: "string" - name: "--bc_pattern" - description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ - \ first 6 nucleotides of the read are from the UMI." - info: null - required: false - direction: "input" - multiple: true - multiple_sep: "," -- name: "Output" - arguments: - - type: "file" - name: "--fastq_1" - description: "Output file for read 1." - info: null - default: - - "$id.$key.read_1.fastq" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--fastq_2" - description: "Output file for read 2." - info: null - default: - - "$id.$key.read_2.fastq" - must_exist: false - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -- name: "Optional arguments" - arguments: - - type: "string" - name: "--umitools_extract_method" - description: "UMI pattern to use." - info: null - default: - - "string" - required: false - choices: - - "string" - - "regex" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--umitools_umi_separator" - description: "The character that separates the UMI in the read name. Most likely\ - \ a colon if you skipped the extraction with UMI-tools and used other software." - info: null - default: - - "_" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--umitools_grouping_method" - description: "Method to use to determine read groups by subsuming those with similar\ - \ UMIs. All methods start by identifying the reads with the same mapping position,\ - \ but treat similar yet nonidentical UMIs differently." - info: null - default: - - "directional" - required: false - choices: - - "unique" - - "percentile" - - "cluster" - - "adjacency" - - "directional" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--umi_discard_read" - description: "After UMI barcode extraction discard either R1 or R2 by setting\ - \ this parameter to 1 or 2, respectively." - info: null - default: - - 0 - required: false - choices: - - 0 - - 1 - - 2 - direction: "input" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -description: "UMI-tools contains tools for dealing with Unique Molecular Identifiers\ - \ (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See\ - \ https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component\ - \ flexible removes UMI sequences from fastq reads. UMIs are removed and appended\ - \ to the read name.\nThis component extracts UMI barcode from a read and add it\ - \ to the read name, leaving any sample barcode in place\n" -test_resources: -- type: "bash_script" - path: "test.sh" - is_executable: true -- type: "file" - path: "scrb_seq_fastq.1.gz" -- type: "file" - path: "scrb_seq_fastq.2.gz" -- type: "file" - path: "slim.fastq.gz" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/nf-core/umitools/extract/main.nf" - - "modules/nf-core/umitools/extract/meta.yml" - last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "pip" - interactive: false - - type: "python" - user: false - packages: - - "umi_tools" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/umitools/umitools_extract/config.vsh.yaml" - runner: "nextflow" - engine: "docker|native" - output: "target/nextflow/umitools/umitools_extract" - executable: "target/nextflow/umitools/umitools_extract/main.nf" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/nextflow/umitools/umitools_extract/nextflow.config b/target/nextflow/umitools/umitools_extract/nextflow.config deleted file mode 100644 index 95ab2f5..0000000 --- a/target/nextflow/umitools/umitools_extract/nextflow.config +++ /dev/null @@ -1,125 +0,0 @@ -manifest { - name = 'umitools/umitools_extract' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = 'main' - description = 'UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.\nThis component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place\n' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1000000000.B } - withLabel: mem2gb { memory = 2000000000.B } - withLabel: mem5gb { memory = 5000000000.B } - withLabel: mem10gb { memory = 10000000000.B } - withLabel: mem20gb { memory = 20000000000.B } - withLabel: mem50gb { memory = 50000000000.B } - withLabel: mem100gb { memory = 100000000000.B } - withLabel: mem200gb { memory = 200000000000.B } - withLabel: mem500gb { memory = 500000000000.B } - withLabel: mem1tb { memory = 1000000000000.B } - withLabel: mem2tb { memory = 2000000000000.B } - withLabel: mem5tb { memory = 5000000000000.B } - withLabel: mem10tb { memory = 10000000000000.B } - withLabel: mem20tb { memory = 20000000000000.B } - withLabel: mem50tb { memory = 50000000000000.B } - withLabel: mem100tb { memory = 100000000000000.B } - withLabel: mem200tb { memory = 200000000000000.B } - withLabel: mem500tb { memory = 500000000000000.B } - withLabel: mem1gib { memory = 1073741824.B } - withLabel: mem2gib { memory = 2147483648.B } - withLabel: mem4gib { memory = 4294967296.B } - withLabel: mem8gib { memory = 8589934592.B } - withLabel: mem16gib { memory = 17179869184.B } - withLabel: mem32gib { memory = 34359738368.B } - withLabel: mem64gib { memory = 68719476736.B } - withLabel: mem128gib { memory = 137438953472.B } - withLabel: mem256gib { memory = 274877906944.B } - withLabel: mem512gib { memory = 549755813888.B } - withLabel: mem1tib { memory = 1099511627776.B } - withLabel: mem2tib { memory = 2199023255552.B } - withLabel: mem4tib { memory = 4398046511104.B } - withLabel: mem8tib { memory = 8796093022208.B } - withLabel: mem16tib { memory = 17592186044416.B } - withLabel: mem32tib { memory = 35184372088832.B } - withLabel: mem64tib { memory = 70368744177664.B } - withLabel: mem128tib { memory = 140737488355328.B } - withLabel: mem256tib { memory = 281474976710656.B } - withLabel: mem512tib { memory = 562949953421312.B } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/umitools/umitools_extract/nextflow_schema.json b/target/nextflow/umitools/umitools_extract/nextflow_schema.json deleted file mode 100644 index 5bc15e7..0000000 --- a/target/nextflow/umitools/umitools_extract/nextflow_schema.json +++ /dev/null @@ -1,191 +0,0 @@ -{ -"$schema": "http://json-schema.org/draft-07/schema", -"title": "umitools_extract", -"description": "UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.\nThis component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place\n", -"type": "object", -"definitions": { - - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "paired": { - "type": - "boolean", - "description": "Type: `boolean`, default: `false`. Paired fastq files or not?", - "help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?" - , - "default":false - } - - - , - "input": { - "type": - "string", - "description": "Type: List of `file`, required, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)", - "help_text": "Type: List of `file`, required, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)" - - } - - - , - "bc_pattern": { - "type": - "string", - "description": "Type: List of `string`, multiple_sep: `\",\"`. The UMI barcode pattern to use e", - "help_text": "Type: List of `string`, multiple_sep: `\",\"`. The UMI barcode pattern to use e.g. \u0027NNNNNN\u0027 indicates that the first 6 nucleotides of the read are from the UMI." - - } - - -} -}, - - - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - - "fastq_1": { - "type": - "string", - "description": "Type: `file`, required, default: `$id.$key.fastq_1.fastq`. Output file for read 1", - "help_text": "Type: `file`, required, default: `$id.$key.fastq_1.fastq`. Output file for read 1." - , - "default":"$id.$key.fastq_1.fastq" - } - - - , - "fastq_2": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2", - "help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2." - , - "default":"$id.$key.fastq_2.fastq" - } - - -} -}, - - - "optional arguments" : { - "title": "Optional arguments", - "type": "object", - "description": "No description", - "properties": { - - - "umitools_extract_method": { - "type": - "string", - "description": "Type: `string`, default: `string`, choices: ``string`, `regex``. UMI pattern to use", - "help_text": "Type: `string`, default: `string`, choices: ``string`, `regex``. UMI pattern to use.", - "enum": ["string", "regex"] - - , - "default":"string" - } - - - , - "umitools_umi_separator": { - "type": - "string", - "description": "Type: `string`, default: `_`. The character that separates the UMI in the read name", - "help_text": "Type: `string`, default: `_`. The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software." - , - "default":"_" - } - - - , - "umitools_grouping_method": { - "type": - "string", - "description": "Type: `string`, default: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs", - "help_text": "Type: `string`, default: `directional`, choices: ``unique`, `percentile`, `cluster`, `adjacency`, `directional``. Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.", - "enum": ["unique", "percentile", "cluster", "adjacency", "directional"] - - , - "default":"directional" - } - - - , - "umi_discard_read": { - "type": - "integer", - "description": "Type: `integer`, default: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively", - "help_text": "Type: `integer`, default: `0`, choices: ``0`, `1`, `2``. After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.", - "enum": [0, 1, 2] - - , - "default":0 - } - - -} -}, - - - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - - "publish_dir": { - "type": - "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - - } - - - , - "param_list": { - "type": - "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - - } - - -} -} -}, -"allOf": [ - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/output" - }, - - { - "$ref": "#/definitions/optional arguments" - }, - - { - "$ref": "#/definitions/nextflow input-output arguments" - } -] -} diff --git a/target/nextflow/umitools_prepareforquant/.config.vsh.yaml b/target/nextflow/umitools_prepareforquant/.config.vsh.yaml deleted file mode 100644 index fdaff97..0000000 --- a/target/nextflow/umitools_prepareforquant/.config.vsh.yaml +++ /dev/null @@ -1,186 +0,0 @@ -name: "umitools_prepareforquant" -version: "main" -argument_groups: -- name: "Input" - arguments: - - type: "file" - name: "--bam" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" -- name: "Output" - arguments: - - type: "file" - name: "--output" - info: null - default: - - "$id.transcriptome_sorted.bam" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--log" - info: null - default: - - "$id.$key.log" - must_exist: true - create_parent: true - required: false - direction: "output" - multiple: false - multiple_sep: ";" -resources: -- type: "bash_script" - path: "script.sh" - is_executable: true -- type: "file" - path: "prepare-for-rsem.py" -description: "Fix paired-end reads in name sorted BAM file to prepare for salmon quantification" -info: - migration_info: - git_repo: "https://github.com/nf-core/rnaseq.git" - paths: - - "modules/local/umitools_prepareforrsem.nf" - last_sha: "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" -status: "enabled" -requirements: - commands: - - "ps" -repositories: -- type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" -- type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - mem1gb: "memory = 1000000000.B" - mem2gb: "memory = 2000000000.B" - mem5gb: "memory = 5000000000.B" - mem10gb: "memory = 10000000000.B" - mem20gb: "memory = 20000000000.B" - mem50gb: "memory = 50000000000.B" - mem100gb: "memory = 100000000000.B" - mem200gb: "memory = 200000000000.B" - mem500gb: "memory = 500000000000.B" - mem1tb: "memory = 1000000000000.B" - mem2tb: "memory = 2000000000000.B" - mem5tb: "memory = 5000000000000.B" - mem10tb: "memory = 10000000000000.B" - mem20tb: "memory = 20000000000000.B" - mem50tb: "memory = 50000000000000.B" - mem100tb: "memory = 100000000000000.B" - mem200tb: "memory = 200000000000000.B" - mem500tb: "memory = 500000000000000.B" - mem1gib: "memory = 1073741824.B" - mem2gib: "memory = 2147483648.B" - mem4gib: "memory = 4294967296.B" - mem8gib: "memory = 8589934592.B" - mem16gib: "memory = 17179869184.B" - mem32gib: "memory = 34359738368.B" - mem64gib: "memory = 68719476736.B" - mem128gib: "memory = 137438953472.B" - mem256gib: "memory = 274877906944.B" - mem512gib: "memory = 549755813888.B" - mem1tib: "memory = 1099511627776.B" - mem2tib: "memory = 2199023255552.B" - mem4tib: "memory = 4398046511104.B" - mem8tib: "memory = 8796093022208.B" - mem16tib: "memory = 17592186044416.B" - mem32tib: "memory = 35184372088832.B" - mem64tib: "memory = 70368744177664.B" - mem128tib: "memory = 140737488355328.B" - mem256tib: "memory = 281474976710656.B" - mem512tib: "memory = 562949953421312.B" - cpu1: "cpus = 1" - cpu2: "cpus = 2" - cpu5: "cpus = 5" - cpu10: "cpus = 10" - cpu20: "cpus = 20" - cpu50: "cpus = 50" - cpu100: "cpus = 100" - cpu200: "cpus = 200" - cpu500: "cpus = 500" - cpu1000: "cpus = 1000" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "ubuntu:22.04" - target_registry: "images.viash-hub.com" - target_tag: "main" - namespace_separator: "/" - setup: - - type: "apt" - packages: - - "pip" - interactive: false - - type: "python" - user: false - packages: - - "umi_tools" - - "pysam" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/umitools_prepareforquant/config.vsh.yaml" - runner: "nextflow" - engine: "docker|native" - output: "target/nextflow/umitools_prepareforquant" - executable: "target/nextflow/umitools_prepareforquant/main.nf" - viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" -package_config: - name: "rnaseq" - version: "main" - info: - test_resources: - - path: "gs://viash-hub-test-data/rnaseq/v1" - dest: "testData" - repositories: - - type: "vsh" - name: "biobox" - repo: "vsh/biobox" - tag: "main" - - type: "vsh" - name: "craftbox" - repo: "craftbox" - tag: "v0.1.0" - viash_version: "0.9.0" - source: "src" - target: "target" - config_mods: - - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ - \ := '$id'\n" - - ".engines += { type: \"native\" }" - - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - - ".engines[.type == 'docker'].target_tag := 'main'" - organization: "vsh" diff --git a/target/nextflow/umitools_prepareforquant/main.nf b/target/nextflow/umitools_prepareforquant/main.nf deleted file mode 100644 index 14c1787..0000000 --- a/target/nextflow/umitools_prepareforquant/main.nf +++ /dev/null @@ -1,3574 +0,0 @@ -// umitools_prepareforquant main -// -// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative -// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -// Intuitive. -// -// The component may contain files which fall under a different license. The -// authors of this component should specify the license in the header of such -// files, or include a separate license file detailing the licenses of all included -// files. - -//////////////////////////// -// VDSL3 helper functions // -//////////////////////////// - -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' -class UnexpectedArgumentTypeException extends Exception { - String errorIdentifier - String stage - String plainName - String expectedClass - String foundClass - - // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} - UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { - super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + - "Expected type: ${expectedClass}. Found type: ${foundClass}") - this.errorIdentifier = errorIdentifier - this.stage = stage - this.plainName = plainName - this.expectedClass = expectedClass - this.foundClass = foundClass - } -} - -/** - * Checks if the given value is of the expected type. If not, an exception is thrown. - * - * @param stage The stage of the argument (input or output) - * @param par The parameter definition - * @param value The value to check - * @param errorIdentifier The identifier to use in the error message - * @return The value, if it is of the expected type - * @throws UnexpectedArgumentTypeException If the value is not of the expected type -*/ -def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { - // expectedClass will only be != null if value is not of the expected type - def expectedClass = null - def foundClass = null - - // todo: split if need be - - if (!par.required && value == null) { - expectedClass = null - } else if (par.multiple) { - if (value !instanceof Collection) { - value = [value] - } - - // split strings - value = value.collectMany{ val -> - if (val instanceof String) { - // collect() to ensure that the result is a List and not simply an array - val.split(par.multiple_sep).collect() - } else { - [val] - } - } - - // process globs - if (par.type == "file" && par.direction == "input") { - value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() - } - - // check types of elements in list - try { - value = value.collect { listVal -> - _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) - } - } catch (UnexpectedArgumentTypeException e) { - expectedClass = "List[${e.expectedClass}]" - foundClass = "List[${e.foundClass}]" - } - } else if (par.type == "string") { - // cast to string if need be - if (value instanceof GString) { - value = value.toString() - } - expectedClass = value instanceof String ? null : "String" - } else if (par.type == "integer") { - // cast to integer if need be - if (value instanceof String) { - try { - value = value.toInteger() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof java.math.BigInteger) { - value = value.intValue() - } - expectedClass = value instanceof Integer ? null : "Integer" - } else if (par.type == "long") { - // cast to long if need be - if (value instanceof String) { - try { - value = value.toLong() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof Integer) { - value = value.toLong() - } - expectedClass = value instanceof Long ? null : "Long" - } else if (par.type == "double") { - // cast to double if need be - if (value instanceof String) { - try { - value = value.toDouble() - } catch (NumberFormatException e) { - // do nothing - } - } - if (value instanceof java.math.BigDecimal) { - value = value.doubleValue() - } - if (value instanceof Float) { - value = value.toDouble() - } - expectedClass = value instanceof Double ? null : "Double" - } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { - // cast to boolean if need be - if (value instanceof String) { - def valueLower = value.toLowerCase() - if (valueLower == "true") { - value = true - } else if (valueLower == "false") { - value = false - } - } - expectedClass = value instanceof Boolean ? null : "Boolean" - } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { - // cast to path if need be - if (value instanceof String) { - value = file(value, hidden: true) - } - if (value instanceof File) { - value = value.toPath() - } - expectedClass = value instanceof Path ? null : "Path" - } else if (par.type == "file" && stage == "input" && par.direction == "output") { - // cast to string if need be - if (value instanceof GString) { - value = value.toString() - } - expectedClass = value instanceof String ? null : "String" - } else { - // didn't find a match for par.type - expectedClass = par.type - } - - if (expectedClass != null) { - if (foundClass == null) { - foundClass = value.getClass().getName() - } - throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) - } - - return value -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' -Map _processInputValues(Map inputs, Map config, String id, String key) { - if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.required) { - assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" - } - } - - inputs = inputs.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" - - value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") - - [ name, value ] - } - } - return inputs -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' -Map _processOutputValues(Map outputs, Map config, String id, String key) { - if (!workflow.stubRun) { - config.allArguments.each { arg -> - if (arg.direction == "output" && arg.required) { - assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : - "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" - } - } - - outputs = outputs.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && it.direction == "output" } - assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" - - value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") - - [ name, value ] - } - } - return outputs -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' -class IDChecker { - final def items = [] as Set - - @groovy.transform.WithWriteLock - boolean observe(String item) { - if (items.contains(item)) { - return false - } else { - items << item - return true - } - } - - @groovy.transform.WithReadLock - boolean contains(String item) { - return items.contains(item) - } - - @groovy.transform.WithReadLock - Set getItems() { - return items.clone() - } -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' - -/** - * Check if the ids are unique across parameter sets - * - * @param parameterSets a list of parameter sets. - */ -private void _checkUniqueIds(List>> parameterSets) { - def ppIds = parameterSets.collect{it[0]} - assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' - -// helper functions for reading params from file // -def _getChild(parent, child) { - if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { - child - } else { - def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() - parentAbsolute.replaceAll('/[^/]*$', "/") + child - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' -/** - * Figure out the param list format based on the file extension - * - * @param param_list A String containing the path to the parameter list file. - * - * @return A String containing the format of the parameter list file. - */ -def _paramListGuessFormat(param_list) { - if (param_list !instanceof String) { - "asis" - } else if (param_list.endsWith(".csv")) { - "csv" - } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { - "json" - } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { - "yaml" - } else { - "yaml_blob" - } -} - - -/** - * Read the param list - * - * @param param_list One of the following: - * - A String containing the path to the parameter list file (csv, json or yaml), - * - A yaml blob of a list of maps (yaml_blob), - * - Or a groovy list of maps (asis). - * @param config A Map of the Viash configuration. - * - * @return A List of Maps containing the parameters. - */ -def _parseParamList(param_list, Map config) { - // first determine format by extension - def paramListFormat = _paramListGuessFormat(param_list) - - def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? - file(param_list, hidden: true) : - null - - // get the correct parser function for the detected params_list format - def paramSets = [] - if (paramListFormat == "asis") { - paramSets = param_list - } else if (paramListFormat == "yaml_blob") { - paramSets = readYamlBlob(param_list) - } else if (paramListFormat == "yaml") { - paramSets = readYaml(paramListPath) - } else if (paramListFormat == "json") { - paramSets = readJson(paramListPath) - } else if (paramListFormat == "csv") { - paramSets = readCsv(paramListPath) - } else { - error "Format of provided --param_list not recognised.\n" + - "Found: '$paramListFormat'.\n" + - "Expected: a csv file, a json file, a yaml file,\n" + - "a yaml blob or a groovy list of maps." - } - - // data checks - assert paramSets instanceof List: "--param_list should contain a list of maps" - for (value in paramSets) { - assert value instanceof Map: "--param_list should contain a list of maps" - } - - // id is argument - def idIsArgument = config.allArguments.any{it.plainName == "id"} - - // Reformat from List to List> by adding the ID as first element of a Tuple2 - paramSets = paramSets.collect({ data -> - def id = data.id - if (!idIsArgument) { - data = data.findAll{k, v -> k != "id"} - } - [id, data] - }) - - // Split parameters with 'multiple: true' - paramSets = paramSets.collect({ id, data -> - data = _splitParams(data, config) - [id, data] - }) - - // The paths of input files inside a param_list file may have been specified relatively to the - // location of the param_list file. These paths must be made absolute. - if (paramListPath) { - paramSets = paramSets.collect({ id, data -> - def new_data = data.collectEntries{ parName, parValue -> - def par = config.allArguments.find{it.plainName == parName} - if (par && par.type == "file" && par.direction == "input") { - if (parValue instanceof Collection) { - parValue = parValue.collectMany{path -> - def x = _resolveSiblingIfNotAbsolute(path, paramListPath) - x instanceof Collection ? x : [x] - } - } else { - parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) - } - } - [parName, parValue] - } - [id, new_data] - }) - } - - return paramSets -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' -/** - * Split parameters for arguments that accept multiple values using their separator - * - * @param paramList A Map containing parameters to split. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A Map of parameters where the parameter values have been split into a list using - * their seperator. - */ -Map _splitParams(Map parValues, Map config){ - def parsedParamValues = parValues.collectEntries { parName, parValue -> - def parameterSettings = config.allArguments.find({it.plainName == parName}) - - if (!parameterSettings) { - // if argument is not found, do not alter - return [parName, parValue] - } - if (parameterSettings.multiple) { // Check if parameter can accept multiple values - if (parValue instanceof Collection) { - parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } - } else if (parValue instanceof String) { - parValue = parValue.split(parameterSettings.multiple_sep) - } else if (parValue == null) { - parValue = [] - } else { - parValue = [ parValue ] - } - parValue = parValue.flatten() - } - // For all parameters check if multiple values are only passed for - // arguments that allow it. Quietly simplify lists of length 1. - if (!parameterSettings.multiple && parValue instanceof Collection) { - assert parValue.size() == 1 : - "Error: argument ${parName} has too many values.\n" + - " Expected amount: 1. Found: ${parValue.size()}" - parValue = parValue[0] - } - [parName, parValue] - } - return parsedParamValues -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' -/** - * Parse nextflow parameters based on settings defined in a viash config. - * Return a list of parameter sets, each parameter set corresponding to - * an event in a nextflow channel. The output from this function can be used - * with Channel.fromList to create a nextflow channel with Vdsl3 formatted - * events. - * - * This function performs: - * - A filtering of the params which can be found in the config file. - * - Process the params_list argument which allows a user to to initialise - * a Vsdl3 channel with multiple parameter sets. Possible formats are - * csv, json, yaml, or simply a yaml_blob. A csv should have column names - * which correspond to the different arguments of this pipeline. A json or a yaml - * file should be a list of maps, each of which has keys corresponding to the - * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. - * When passing a csv, json or yaml, relative path names are relativized to the - * location of the parameter file. - * - Combine the parameter sets into a vdsl3 Channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A list of parameters with the first element of the event being - * the event ID and the second element containing a map of the parsed parameters. - */ - -private List>> _paramsToParamSets(Map params, Map config){ - // todo: fetch key from run args - def key_ = config.name - - /* parse regular parameters (not in param_list) */ - /*************************************************/ - def globalParams = config.allArguments - .findAll { params.containsKey(it.plainName) } - .collectEntries { [ it.plainName, params[it.plainName] ] } - def globalID = params.get("id", null) - - /* process params_list arguments */ - /*********************************/ - def paramList = params.containsKey("param_list") && params.param_list != null ? - params.param_list : [] - // if (paramList instanceof String) { - // paramList = [paramList] - // } - // def paramSets = paramList.collectMany{ _parseParamList(it, config) } - // TODO: be able to process param_list when it is a list of strings - def paramSets = _parseParamList(paramList, config) - if (paramSets.isEmpty()) { - paramSets = [[null, [:]]] - } - - /* combine arguments into channel */ - /**********************************/ - def processedParams = paramSets.indexed().collect{ index, tup -> - // Process ID - def id = tup[0] ?: globalID - - if (workflow.stubRun && !id) { - // if stub run, explicitly add an id if missing - id = "stub${index}" - } - assert id != null: "Each parameter set should have at least an 'id'" - - // Process params - def parValues = globalParams + tup[1] - // // Remove parameters which are null, if the default is also null - // parValues = parValues.collectEntries{paramName, paramValue -> - // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) - // if ( paramValue != null || parameterSettings.get("default", null) != null ) { - // [paramName, paramValue] - // } - // } - parValues = parValues.collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" - - if (par == null) { - return [:] - } - value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") - - [ name, value ] - } - - [id, parValues] - } - - // Check if ids (first element of each list) is unique - _checkUniqueIds(processedParams) - return processedParams -} - -/** - * Parse nextflow parameters based on settings defined in a viash config - * and return a nextflow channel. - * - * @param params Input parameters. Can optionaly contain a 'param_list' key that - * provides a list of arguments that can be split up into multiple events - * in the output channel possible formats of param_lists are: a csv file, - * json file, a yaml file or a yaml blob. Each parameters set (event) must - * have a unique ID. - * @param config A Map of the Viash configuration. This Map can be generated from the config file - * using the readConfig() function. - * - * @return A nextflow Channel with events. Events are formatted as a tuple that contains - * first contains the ID of the event and as second element holds a parameter map. - * - * - */ -def channelFromParams(Map params, Map config) { - def processedParams = _paramsToParamSets(params, config) - return Channel.fromList(processedParams) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' -def checkUniqueIds(Map args) { - def stopOnError = args.stopOnError == null ? args.stopOnError : true - - def idChecker = new IDChecker() - - return filter { tup -> - if (!idChecker.observe(tup[0])) { - if (stopOnError) { - error "Duplicate id: ${tup[0]}" - } else { - log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" - return false - } - } - return true - } -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' -// This helper file will be deprecated soon -preprocessInputsDeprecationWarningPrinted = false - -def preprocessInputsDeprecationWarning() { - if (!preprocessInputsDeprecationWarningPrinted) { - preprocessInputsDeprecationWarningPrinted = true - System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") - } -} - -/** - * Generate a nextflow Workflow that allows processing a channel of - * Vdsl3 formatted events and apply a Viash config to them: - * - Gather default parameters from the Viash config and make - * sure that they are correctly formatted (see applyConfig method). - * - Format the input parameters (also using the applyConfig method). - * - Apply the default parameter to the input parameters. - * - Do some assertions: - * ~ Check if the event IDs in the channel are unique. - * - * The events in the channel are formatted as tuples, with the - * first element of the tuples being a unique id of the parameter set, - * and the second element containg the the parameters themselves. - * Optional extra elements of the tuples will be passed to the output as is. - * - * @param args A map that must contain a 'config' key that points - * to a parsed config (see readConfig()). Optionally, a - * 'key' key can be provided which can be used to create a unique - * name for the workflow process. - * - * @return A workflow that allows processing a channel of Vdsl3 formatted events - * and apply a Viash config to them. - */ -def preprocessInputs(Map args) { - preprocessInputsDeprecationWarning() - - def config = args.config - assert config instanceof Map : - "Error in preprocessInputs: config must be a map. " + - "Expected class: Map. Found: config.getClass() is ${config.getClass()}" - def key_ = args.key ?: config.name - - // Get different parameter types (used throughout this function) - def defaultArgs = config.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - map { tup -> - def id = tup[0] - def data = tup[1] - def passthrough = tup.drop(2) - - def new_data = (defaultArgs + data).collectEntries { name, value -> - def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } - - if (par != null) { - value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") - } - - [ name, value ] - } - - [ id, new_data ] + passthrough - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' -/** - * Run a list of components on a stream of data. - * - * @param components: list of Viash VDSL3 modules to run - * @param fromState: a closure, a map or a list of keys to extract from the input data. - * If a closure, it will be called with the id, the data and the component config. - * @param toState: a closure, a map or a list of keys to extract from the output data - * If a closure, it will be called with the id, the output data, the old state and the component config. - * @param filter: filter function to apply to the input. - * It will be called with the id, the data and the component config. - * @param id: id to use for the output data - * If a closure, it will be called with the id, the data and the component config. - * @param auto: auto options to pass to the components - * - * @return: a workflow that runs the components - **/ -def runComponents(Map args) { - log.warn("runComponents is deprecated, use runEach instead") - assert args.components: "runComponents should be passed a list of components to run" - - def components_ = args.components - if (components_ !instanceof List) { - components_ = [ components_ ] - } - assert components_.size() > 0: "pass at least one component to runComponents" - - def fromState_ = args.fromState - def toState_ = args.toState - def filter_ = args.filter - def id_ = args.id - - workflow runComponentsWf { - take: input_ch - main: - - // generate one channel per method - out_chs = components_.collect{ comp_ -> - def comp_config = comp_.config - - def filter_ch = filter_ - ? input_ch | filter{tup -> - filter_(tup[0], tup[1], comp_config) - } - : input_ch - def id_ch = id_ - ? filter_ch | map{tup -> - // def new_id = id_(tup[0], tup[1], comp_config) - def new_id = tup[0] - if (id_ instanceof String) { - new_id = id_ - } else if (id_ instanceof Closure) { - new_id = id_(new_id, tup[1], comp_config) - } - [new_id] + tup.drop(1) - } - : filter_ch - def data_ch = id_ch | map{tup -> - def new_data = tup[1] - if (fromState_ instanceof Map) { - new_data = fromState_.collectEntries{ key0, key1 -> - [key0, new_data[key1]] - } - } else if (fromState_ instanceof List) { - new_data = fromState_.collectEntries{ key -> - [key, new_data[key]] - } - } else if (fromState_ instanceof Closure) { - new_data = fromState_(tup[0], new_data, comp_config) - } - tup.take(1) + [new_data] + tup.drop(1) - } - def out_ch = data_ch - | comp_.run( - auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] - ) - def post_ch = toState_ - ? out_ch | map{tup -> - def output = tup[1] - def old_state = tup[2] - def new_state = null - if (toState_ instanceof Map) { - new_state = old_state + toState_.collectEntries{ key0, key1 -> - [key0, output[key1]] - } - } else if (toState_ instanceof List) { - new_state = old_state + toState_.collectEntries{ key -> - [key, output[key]] - } - } else if (toState_ instanceof Closure) { - new_state = toState_(tup[0], output, old_state, comp_config) - } - [tup[0], new_state] + tup.drop(3) - } - : out_ch - - post_ch - } - - // mix all results - output_ch = - (out_chs.size == 1) - ? out_chs[0] - : out_chs[0].mix(*out_chs.drop(1)) - - emit: output_ch - } - - return runComponentsWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' -/** - * Run a list of components on a stream of data. - * - * @param components: list of Viash VDSL3 modules to run - * @param fromState: a closure, a map or a list of keys to extract from the input data. - * If a closure, it will be called with the id, the data and the component itself. - * @param toState: a closure, a map or a list of keys to extract from the output data - * If a closure, it will be called with the id, the output data, the old state and the component itself. - * @param filter: filter function to apply to the input. - * It will be called with the id, the data and the component itself. - * @param id: id to use for the output data - * If a closure, it will be called with the id, the data and the component itself. - * @param auto: auto options to pass to the components - * - * @return: a workflow that runs the components - **/ -def runEach(Map args) { - assert args.components: "runEach should be passed a list of components to run" - - def components_ = args.components - if (components_ !instanceof List) { - components_ = [ components_ ] - } - assert components_.size() > 0: "pass at least one component to runEach" - - def fromState_ = args.fromState - def toState_ = args.toState - def filter_ = args.filter - def runIf_ = args.runIf - def id_ = args.id - - assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." - - workflow runEachWf { - take: input_ch - main: - - // generate one channel per method - out_chs = components_.collect{ comp_ -> - def filter_ch = filter_ - ? input_ch | filter{tup -> - filter_(tup[0], tup[1], comp_) - } - : input_ch - def id_ch = id_ - ? filter_ch | map{tup -> - def new_id = id_ - if (new_id instanceof Closure) { - new_id = new_id(tup[0], tup[1], comp_) - } - assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" - [new_id] + tup.drop(1) - } - : filter_ch - def chPassthrough = null - def chRun = null - if (runIf_) { - def idRunIfBranch = id_ch.branch{ tup -> - run: runIf_(tup[0], tup[1], comp_) - passthrough: true - } - chPassthrough = idRunIfBranch.passthrough - chRun = idRunIfBranch.run - } else { - chRun = id_ch - chPassthrough = Channel.empty() - } - def data_ch = chRun | map{tup -> - def new_data = tup[1] - if (fromState_ instanceof Map) { - new_data = fromState_.collectEntries{ key0, key1 -> - [key0, new_data[key1]] - } - } else if (fromState_ instanceof List) { - new_data = fromState_.collectEntries{ key -> - [key, new_data[key]] - } - } else if (fromState_ instanceof Closure) { - new_data = fromState_(tup[0], new_data, comp_) - } - tup.take(1) + [new_data] + tup.drop(1) - } - def out_ch = data_ch - | comp_.run( - auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] - ) - def post_ch = toState_ - ? out_ch | map{tup -> - def output = tup[1] - def old_state = tup[2] - def new_state = null - if (toState_ instanceof Map) { - new_state = old_state + toState_.collectEntries{ key0, key1 -> - [key0, output[key1]] - } - } else if (toState_ instanceof List) { - new_state = old_state + toState_.collectEntries{ key -> - [key, output[key]] - } - } else if (toState_ instanceof Closure) { - new_state = toState_(tup[0], output, old_state, comp_) - } - [tup[0], new_state] + tup.drop(3) - } - : out_ch - - def return_ch = post_ch - | concat(chPassthrough) - - return_ch - } - - // mix all results - output_ch = - (out_chs.size == 1) - ? out_chs[0] - : out_chs[0].mix(*out_chs.drop(1)) - - emit: output_ch - } - - return runEachWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' -/** - * Join sourceChannel to targetChannel - * - * This function joins the sourceChannel to the targetChannel. - * However, each id in the targetChannel must be present in the - * sourceChannel. If _meta.join_id exists in the targetChannel, that is - * used as an id instead. If the id doesn't match any id in the sourceChannel, - * an error is thrown. - */ - -def safeJoin(targetChannel, sourceChannel, key) { - def sourceIDs = new IDChecker() - - def sourceCheck = sourceChannel - | map { tup -> - sourceIDs.observe(tup[0]) - tup - } - def targetCheck = targetChannel - | map { tup -> - def id = tup[0] - - if (!sourceIDs.contains(id)) { - error ( - "Error in module '${key}' when merging output with original state.\n" + - " Reason: output with id '${id}' could not be joined with source channel.\n" + - " If the IDs in the output channel differ from the input channel,\n" + - " please set `tup[1]._meta.join_id to the original ID.\n" + - " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + - " Unexpected ID in the output channel: '${id}'.\n" + - " Example input event: [\"id\", [input: file(...)]],\n" + - " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" - ) - } - // TODO: add link to our documentation on how to fix this - - tup - } - - sourceCheck.cross(targetChannel) - | map{ left, right -> - right + left.drop(1) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' -def _processArgument(arg) { - arg.multiple = arg.multiple != null ? arg.multiple : false - arg.required = arg.required != null ? arg.required : false - arg.direction = arg.direction != null ? arg.direction : "input" - arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" - arg.plainName = arg.name.replaceAll("^-*", "") - - if (arg.type == "file") { - arg.must_exist = arg.must_exist != null ? arg.must_exist : true - arg.create_parent = arg.create_parent != null ? arg.create_parent : true - } - - // add default values to output files which haven't already got a default - if (arg.type == "file" && arg.direction == "output" && arg.default == null) { - def mult = arg.multiple ? "_*" : "" - def extSearch = "" - if (arg.default != null) { - extSearch = arg.default - } else if (arg.example != null) { - extSearch = arg.example - } - if (extSearch instanceof List) { - extSearch = extSearch[0] - } - def extSearchResult = extSearch.find("\\.[^\\.]+\$") - def ext = extSearchResult != null ? extSearchResult : "" - arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" - if (arg.multiple) { - arg.default = [arg.default] - } - } - - if (!arg.multiple) { - if (arg.default != null && arg.default instanceof List) { - arg.default = arg.default[0] - } - if (arg.example != null && arg.example instanceof List) { - arg.example = arg.example[0] - } - } - - if (arg.type == "boolean_true") { - arg.default = false - } - if (arg.type == "boolean_false") { - arg.default = true - } - - arg -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' -def addGlobalArguments(config) { - def localConfig = [ - "argument_groups": [ - [ - "name": "Nextflow input-output arguments", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "arguments" : [ - [ - 'name': '--publish_dir', - 'required': true, - 'type': 'string', - 'description': 'Path to an output directory.', - 'example': 'output/', - 'multiple': false - ], - [ - 'name': '--param_list', - 'required': false, - 'type': 'string', - 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. - | - |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. - |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. - |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. - |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. - | - |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), - 'example': 'my_params.yaml', - 'multiple': false, - 'hidden': true - ] - // TODO: allow multiple: true in param_list? - // TODO: allow to specify a --param_list_regex to filter the param_list? - // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? - ] - ] - ] - ] - - return processConfig(_mergeMap(config, localConfig)) -} - -def _mergeMap(Map lhs, Map rhs) { - return rhs.inject(lhs.clone()) { map, entry -> - if (map[entry.key] instanceof Map && entry.value instanceof Map) { - map[entry.key] = _mergeMap(map[entry.key], entry.value) - } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { - map[entry.key] += entry.value - } else { - map[entry.key] = entry.value - } - return map - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' -def _generateArgumentHelp(param) { - // alternatives are not supported - // def names = param.alternatives ::: List(param.name) - - def unnamedProps = [ - ["required parameter", param.required], - ["multiple values allowed", param.multiple], - ["output", param.direction.toLowerCase() == "output"], - ["file must exist", param.type == "file" && param.must_exist] - ].findAll{it[1]}.collect{it[0]} - - def dflt = null - if (param.default != null) { - if (param.default instanceof List) { - dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - dflt = param.default.toString() - } - } - def example = null - if (param.example != null) { - if (param.example instanceof List) { - example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") - } else { - example = param.example.toString() - } - } - def min = param.min?.toString() - def max = param.max?.toString() - - def escapeChoice = { choice -> - def s1 = choice.replaceAll("\\n", "\\\\n") - def s2 = s1.replaceAll("\"", """\\\"""") - s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 - } - def choices = param.choices == null ? - null : - "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" - - def namedPropsStr = [ - ["type", ([param.type] + unnamedProps).join(", ")], - ["default", dflt], - ["example", example], - ["choices", choices], - ["min", min], - ["max", max] - ] - .findAll{it[1]} - .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} - .join("") - - def descStr = param.description == null ? - "" : - _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") - - "\n --" + param.plainName + - namedPropsStr + - descStr -} - -// Based on Helper.generateHelp() in Helper.scala -def _generateHelp(config) { - def fun = config - - // PART 1: NAME AND VERSION - def nameStr = fun.name + - (fun.version == null ? "" : " " + fun.version) - - // PART 2: DESCRIPTION - def descrStr = fun.description == null ? - "" : - "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") - - // PART 3: Usage - def usageStr = fun.usage == null ? - "" : - "\n\nUsage:\n" + fun.usage.trim() - - // PART 4: Options - def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> - def name = argGroup.name - def descriptionStr = argGroup.description == null ? - "" : - "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" - def arguments = argGroup.arguments.collect{arg -> - arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg - }.findAll{it != null} - def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} - - "\n\n$name:" + - descriptionStr + - argumentStrs.join("\n") - } - - // FINAL: combine - def out = nameStr + - descrStr + - usageStr + - argGroupStrs.join("") - - return out -} - -// based on Format._paragraphWrap -def _paragraphWrap(str, maxLength) { - def outLines = [] - str.split("\n").each{par -> - def words = par.split("\\s").toList() - - def word = null - def line = words.pop() - while(!words.isEmpty()) { - word = words.pop() - if (line.length() + word.length() + 1 <= maxLength) { - line = line + " " + word - } else { - outLines.add(line) - line = word - } - } - if (words.isEmpty()) { - outLines.add(line) - } - } - return outLines -} - -def helpMessage(config) { - if (params.containsKey("help") && params.help) { - def mergedConfig = addGlobalArguments(config) - def helpStr = _generateHelp(mergedConfig) - println(helpStr) - exit 0 - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' -def processConfig(config) { - // set defaults for arguments - config.arguments = - (config.arguments ?: []).collect{_processArgument(it)} - - // set defaults for argument_group arguments - config.argument_groups = - (config.argument_groups ?: []).collect{grp -> - grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} - grp - } - - // create combined arguments list - config.allArguments = - config.arguments + - config.argument_groups.collectMany{it.arguments} - - // add missing argument groups (based on Functionality::allArgumentGroups()) - def argGroups = config.argument_groups - if (argGroups.any{it.name.toLowerCase() == "arguments"}) { - argGroups = argGroups.collect{ grp -> - if (grp.name.toLowerCase() == "arguments") { - grp = grp + [ - arguments: grp.arguments + config.arguments - ] - } - grp - } - } else { - argGroups = argGroups + [ - name: "Arguments", - arguments: config.arguments - ] - } - config.allArgumentGroups = argGroups - - config -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' - -def readConfig(file) { - def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) - processConfig(config) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' -/** - * Resolve a path relative to the current file. - * - * @param str The path to resolve, as a String. - * @param parentPath The path to resolve relative to, as a Path. - * - * @return The path that may have been resovled, as a Path. - */ -def _resolveSiblingIfNotAbsolute(str, parentPath) { - if (str !instanceof String) { - return str - } - if (!_stringIsAbsolutePath(str)) { - return parentPath.resolveSibling(str) - } else { - return file(str, hidden: true) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' -/** - * Check whether a path as a string is absolute. - * - * In the past, we tried using `file(., relative: true).isAbsolute()`, - * but the 'relative' option was added in 22.10.0. - * - * @param path The path to check, as a String. - * - * @return Whether the path is absolute, as a boolean. - */ -def _stringIsAbsolutePath(path) { - def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ - - assert path instanceof String - return _resolve_URL_PROTOCOL.matcher(path).matches() -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' -class CustomTraceObserver implements nextflow.trace.TraceObserver { - List traces - - CustomTraceObserver(List traces) { - this.traces = traces - } - - @Override - void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { - def trace2 = trace.store.clone() - trace2.script = null - traces.add(trace2) - } - - @Override - void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { - def trace2 = trace.store.clone() - trace2.script = null - traces.add(trace2) - } -} - -def collectTraces() { - def traces = Collections.synchronizedList([]) - - // add custom trace observer which stores traces in the traces object - session.observers.add(new CustomTraceObserver(traces)) - - traces -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' -/** - * Performs a deep clone of the given object. - * @param x an object - */ -def deepClone(x) { - iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' -def getPublishDir() { - return params.containsKey("publish_dir") ? params.publish_dir : - params.containsKey("publishDir") ? params.publishDir : - null -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' - -// Recurse upwards until we find a '.build.yaml' file -def _findBuildYamlFile(pathPossiblySymlink) { - def path = pathPossiblySymlink.toRealPath() - def child = path.resolve(".build.yaml") - if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { - return child - } else { - def parent = path.getParent() - if (parent == null) { - return null - } else { - return _findBuildYamlFile(parent) - } - } -} - -// get the root of the target folder -def getRootDir() { - def dir = _findBuildYamlFile(meta.resources_dir) - assert dir != null: "Could not find .build.yaml in the folder structure" - dir.getParent() -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' -/** - * Recursively apply a function over the leaves of an object. - * @param obj The object to iterate over. - * @param fun The function to apply to each value. - * @return The object with the function applied to each value. - */ -def iterateMap(obj, fun) { - if (obj instanceof List && obj !instanceof String) { - return obj.collect{item -> - iterateMap(item, fun) - } - } else if (obj instanceof Map) { - return obj.collectEntries{key, item -> - [key.toString(), iterateMap(item, fun)] - } - } else { - return fun(obj) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' -/** - * A view for printing the event of each channel as a YAML blob. - * This is useful for debugging. - */ -def niceView() { - workflow niceViewWf { - take: input - main: - output = input - | view{toYamlBlob(it)} - emit: output - } - return niceViewWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' - -def readCsv(file_path) { - def output = [] - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - - // todo: allow escaped quotes in string - // todo: allow single quotes? - def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') - def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') - - def br = java.nio.file.Files.newBufferedReader(inputFile) - - def row = -1 - def header = null - while (br.ready() && header == null) { - def line = br.readLine() - row++ - if (!line.startsWith("#")) { - header = splitRegex.split(line, -1).collect{field -> - m = removeQuote.matcher(field) - m.find() ? m.replaceFirst('$1') : field - } - } - } - assert header != null: "CSV file should contain a header" - - while (br.ready()) { - def line = br.readLine() - row++ - if (line == null) { - br.close() - break - } - - if (!line.startsWith("#")) { - def predata = splitRegex.split(line, -1) - def data = predata.collect{field -> - if (field == "") { - return null - } - def m = removeQuote.matcher(field) - if (m.find()) { - return m.replaceFirst('$1') - } else { - return field - } - } - assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" - - def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} - output.add(dataMap) - } - } - - output -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' -def readJson(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - def jsonSlurper = new groovy.json.JsonSlurper() - jsonSlurper.parse(inputFile) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' -def readJsonBlob(str) { - def jsonSlurper = new groovy.json.JsonSlurper() - jsonSlurper.parseText(str) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' -// Custom constructor to modify how certain objects are parsed from YAML -class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { - Path root - - class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { - public Object construct(org.yaml.snakeyaml.nodes.Node node) { - String filename = (String) constructScalar(node); - if (root != null) { - return root.resolve(filename); - } - return java.nio.file.Paths.get(filename); - } - } - - CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { - super(options) - this.root = root - // Handling !file tag and parse it back to a File type - this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) - } -} - -def readTaggedYaml(Path path) { - def options = new org.yaml.snakeyaml.LoaderOptions() - def constructor = new CustomConstructor(options, path.getParent()) - def yaml = new org.yaml.snakeyaml.Yaml(constructor) - return yaml.load(path.text) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' -def readYaml(file_path) { - def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path - def yamlSlurper = new org.yaml.snakeyaml.Yaml() - yamlSlurper.load(inputFile) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' -def readYamlBlob(str) { - def yamlSlurper = new org.yaml.snakeyaml.Yaml() - yamlSlurper.load(str) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' -String toJsonBlob(data) { - return groovy.json.JsonOutput.toJson(data) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' -// Custom representer to modify how certain objects are represented in YAML -class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { - Path relativizer - - class RepresentPath implements org.yaml.snakeyaml.representer.Represent { - public String getFileName(Object obj) { - if (obj instanceof File) { - obj = ((File) obj).toPath(); - } - if (obj !instanceof Path) { - throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); - } - def path = (Path) obj; - - if (relativizer != null) { - return relativizer.relativize(path).toString() - } else { - return path.toString() - } - } - - public org.yaml.snakeyaml.nodes.Node representData(Object data) { - String filename = getFileName(data); - def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); - return representScalar(tag, filename); - } - } - CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { - super(options) - this.relativizer = relativizer - this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) - this.representers.put(Path, new RepresentPath()) - this.representers.put(File, new RepresentPath()) - } -} - -String toTaggedYamlBlob(data) { - return toRelativeTaggedYamlBlob(data, null) -} -String toRelativeTaggedYamlBlob(data, Path relativizer) { - def options = new org.yaml.snakeyaml.DumperOptions() - options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) - def representer = new CustomRepresenter(options, relativizer) - def yaml = new org.yaml.snakeyaml.Yaml(representer, options) - return yaml.dump(data) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' -String toYamlBlob(data) { - def options = new org.yaml.snakeyaml.DumperOptions() - options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) - options.setPrettyFlow(true) - def yaml = new org.yaml.snakeyaml.Yaml(options) - def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) - return yaml.dump(cleanData) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' -void writeJson(data, file) { - assert data: "writeJson: data should not be null" - assert file: "writeJson: file should not be null" - file.write(toJsonBlob(data)) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' -void writeYaml(data, file) { - assert data: "writeYaml: data should not be null" - assert file: "writeYaml: file should not be null" - file.write(toYamlBlob(data)) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' -def findStates(Map params, Map config) { - def auto_config = deepClone(config) - def auto_params = deepClone(params) - - auto_config = auto_config.clone() - // override arguments - auto_config.argument_groups = [] - auto_config.arguments = [ - [ - type: "string", - name: "--id", - description: "A dummy identifier", - required: false - ], - [ - type: "file", - name: "--input_states", - example: "/path/to/input/directory/**/state.yaml", - description: "Path to input directory containing the datasets to be integrated.", - required: true, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--filter", - example: "foo/.*/state.yaml", - description: "Regex to filter state files by path.", - required: false - ], - // to do: make this a yaml blob? - [ - type: "string", - name: "--rename_keys", - example: ["newKey1:oldKey1", "newKey2:oldKey2"], - description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", - required: false, - multiple: true, - multiple_sep: ";" - ], - [ - type: "string", - name: "--settings", - example: '{"output_dataset": "dataset.h5ad", "k": 10}', - description: "Global arguments as a JSON glob to be passed to all components.", - required: false - ] - ] - if (!(auto_params.containsKey("id"))) { - auto_params["id"] = "auto" - } - - // run auto config through processConfig once more - auto_config = processConfig(auto_config) - - workflow findStatesWf { - helpMessage(auto_config) - - output_ch = - channelFromParams(auto_params, auto_config) - | flatMap { autoId, args -> - - def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] - - // look for state files in input dir - def stateFiles = args.input_states - - // filter state files by regex - if (args.filter) { - stateFiles = stateFiles.findAll{ stateFile -> - def stateFileStr = stateFile.toString() - def matcher = stateFileStr =~ args.filter - matcher.matches()} - } - - // read in states - def states = stateFiles.collect { stateFile -> - def state_ = readTaggedYaml(stateFile) - [state_.id, state_] - } - - // construct renameMap - if (args.rename_keys) { - def renameMap = args.rename_keys.collectEntries{renameString -> - def split = renameString.split(":") - assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" - split - } - - // rename keys in state, only let states through which have all keys - // also add global settings - states = states.collectMany{id, state -> - def newState = [:] - - for (key in renameMap.keySet()) { - def origKey = renameMap[key] - if (!(state.containsKey(origKey))) { - return [] - } - newState[key] = state[origKey] - } - - [[id, globalSettings + newState]] - } - } - - states - } - emit: - output_ch - } - - return findStatesWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' -def joinStates(Closure apply_) { - workflow joinStatesWf { - take: input_ch - main: - output_ch = input_ch - | toSortedList - | filter{ it.size() > 0 } - | map{ tups -> - def ids = tups.collect{it[0]} - def states = tups.collect{it[1]} - apply_(ids, states) - } - - emit: output_ch - } - return joinStatesWf -} -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' -def collectFiles(obj) { - if (obj instanceof java.io.File || obj instanceof Path) { - return [obj] - } else if (obj instanceof List && obj !instanceof String) { - return obj.collectMany{item -> - collectFiles(item) - } - } else if (obj instanceof Map) { - return obj.collectMany{key, item -> - collectFiles(item) - } - } else { - return [] - } -} - -/** - * Recurse through a state and collect all input files and their target output filenames. - * @param obj The state to recurse through. - * @param prefix The prefix to prepend to the output filenames. - */ -def collectInputOutputPaths(obj, prefix) { - if (obj instanceof File || obj instanceof Path) { - def path = obj instanceof Path ? obj : obj.toPath() - def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" - def newFilename = prefix + ext - return [[obj, newFilename]] - } else if (obj instanceof List && obj !instanceof String) { - return obj.withIndex().collectMany{item, ix -> - collectInputOutputPaths(item, prefix + "_" + ix) - } - } else if (obj instanceof Map) { - return obj.collectMany{key, item -> - collectInputOutputPaths(item, prefix + "." + key) - } - } else { - return [] - } -} - -def publishStates(Map args) { - def key_ = args.get("key") - def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) - - assert key_ != null : "publishStates: key must be specified" - - workflow publishStatesWf { - take: input_ch - main: - input_ch - | map { tup -> - def id_ = tup[0] - def state_ = tup[1] - - // the input files and the target output filenames - def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() - def inputFiles_ = inputoutputFilenames_[0] - def outputFilenames_ = inputoutputFilenames_[1] - - def yamlFilename = yamlTemplate_ - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - - // TODO: do the pathnames in state_ match up with the outputFilenames_? - - // convert state to yaml blob - def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) - - [id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_] - } - | publishStatesProc - emit: input_ch - } - return publishStatesWf -} -process publishStatesProc { - // todo: check publishpath? - publishDir path: "${getPublishDir()}/", mode: "copy" - tag "$id" - input: - tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) - output: - tuple val(id), path{[yamlFile] + outputFiles} - script: - def copyCommands = [ - inputFiles instanceof List ? inputFiles : [inputFiles], - outputFiles instanceof List ? outputFiles : [outputFiles] - ] - .transpose() - .collectMany{infile, outfile -> - if (infile.toString() != outfile.toString()) { - [ - "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", - "cp -r '${infile.toString()}' '${outfile.toString()}'" - ] - } else { - // no need to copy if infile is the same as outfile - [] - } - } - """ -mkdir -p "\$(dirname '${yamlFile}')" -echo "Storing state as yaml" -echo '${yamlBlob}' > '${yamlFile}' -echo "Copying output files to destination folder" -${copyCommands.join("\n ")} -""" -} - - -// this assumes that the state contains no other values other than those specified in the config -def publishStatesByConfig(Map args) { - def config = args.get("config") - assert config != null : "publishStatesByConfig: config must be specified" - - def key_ = args.get("key", config.name) - assert key_ != null : "publishStatesByConfig: key must be specified" - - workflow publishStatesSimpleWf { - take: input_ch - main: - input_ch - | map { tup -> - def id_ = tup[0] - def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] - def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] - - // TODO: allow overriding the state.yaml template - // TODO TODO: if auto.publish == "state", add output_state as an argument - def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' - def yamlFilename = yamlTemplate - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() - - // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where - // - key is a String - // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) - // - inputPath is a List[Path] - // - outputFilename is a List[String] - // - (key, value) are the tuples that will be saved to the state.yaml file - // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) - def processedState = - config.allArguments - .findAll { it.direction == "output" } - .collectMany { par -> - def plainName_ = par.plainName - // if the state does not contain the key, it's an - // optional argument for which the component did - // not generate any output - if (!state_.containsKey(plainName_)) { - return [] - } - def value = state_[plainName_] - // if the parameter is not a file, it should be stored - // in the state as-is, but is not something that needs - // to be copied from the source path to the dest path - if (par.type != "file") { - return [[key: plainName_, value: value, inputPath: [], outputFilename: []]] - } - // if the orig state does not contain this filename, - // it's an optional argument for which the user specified - // that it should not be returned as a state - if (!origState_.containsKey(plainName_)) { - return [] - } - def filenameTemplate = origState_[plainName_] - // if the pararameter is multiple: true, fetch the template - if (par.multiple && filenameTemplate instanceof List) { - filenameTemplate = filenameTemplate[0] - } - // instantiate the template - def filename = filenameTemplate - .replaceAll('\\$id', id_) - .replaceAll('\\$\\{id\\}', id_) - .replaceAll('\\$key', key_) - .replaceAll('\\$\\{key\\}', key_) - if (par.multiple) { - // if the parameter is multiple: true, the filename - // should contain a wildcard '*' that is replaced with - // the index of the file - assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" - def outputPerFile = value.withIndex().collect{ val, ix -> - def filename_ix = filename.replace("*", ix.toString()) - def value_ = java.nio.file.Paths.get(filename_ix) - // if id contains a slash - if (yamlDir != null) { - value_ = yamlDir.relativize(value_) - } - def inputPath = val instanceof File ? val.toPath() : val - [value: value_, inputPath: inputPath, outputFilename: filename_ix] - } - def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key -> - [key, outputPerFile.collect{dic -> dic[key]}] - } - return [[key: plainName_] + transposedOutputs] - } else { - def value_ = java.nio.file.Paths.get(filename) - // if id contains a slash - if (yamlDir != null) { - value_ = yamlDir.relativize(value_) - } - def inputPath = value instanceof File ? value.toPath() : value - return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]] - } - } - - def updatedState_ = processedState.collectEntries{[it.key, it.value]} - def inputPaths = processedState.collectMany{it.inputPath} - def outputFilenames = processedState.collectMany{it.outputFilename} - - // convert state to yaml blob - def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) - - [id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames] - } - | publishStatesProc - emit: input_ch - } - return publishStatesSimpleWf -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' -def setState(fun) { - assert fun instanceof Closure || fun instanceof Map || fun instanceof List : - "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" - - // if fun is a List, convert to map - if (fun instanceof List) { - // check whether fun is a list[string] - assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" - fun = fun.collectEntries{[it, it]} - } - - // if fun is a map, convert to closure - if (fun instanceof Map) { - // check whether fun is a map[string, string] - assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" - assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" - def funMap = fun.clone() - // turn the map into a closure to be used later on - fun = { id_, state_ -> - assert state_ instanceof Map : "Error in setState: the state is not a Map" - funMap.collectMany{newkey, origkey -> - if (state_.containsKey(origkey)) { - [[newkey, state_[origkey]]] - } else { - [] - } - }.collectEntries() - } - } - - map { tup -> - def id = tup[0] - def state = tup[1] - def unfilteredState = fun(id, state) - def newState = unfilteredState.findAll{key, val -> val != null} - [id, newState] + tup.drop(2) - } -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' -// TODO: unit test processAuto -def processAuto(Map auto) { - // remove null values - auto = auto.findAll{k, v -> v != null} - - // check for unexpected keys - def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] - def unexpectedKeys = auto.keySet() - expectedKeys - assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" - - // check auto.simplifyInput - assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" - - // check auto.simplifyOutput - assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" - - // check auto.transcript - assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" - - // check auto.publish - assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" - - return auto.subMap(expectedKeys) -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' -def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { - assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" - map.forEach { key, val -> - assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" - } - requiredKeys.forEach { requiredKey -> - assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" - } -} - -// TODO: unit test processDirectives -def processDirectives(Map drctv) { - // remove null values - drctv = drctv.findAll{k, v -> v != null} - - // check for unexpected keys - def expectedKeys = [ - "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" - ] - def unexpectedKeys = drctv.keySet() - expectedKeys - assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" - - /* DIRECTIVE accelerator - accepted examples: - - [ limit: 4, type: "nvidia-tesla-k80" ] - */ - if (drctv.containsKey("accelerator")) { - assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") - } - - /* DIRECTIVE afterScript - accepted examples: - - "source /cluster/bin/cleanup" - */ - if (drctv.containsKey("afterScript")) { - assert drctv["afterScript"] instanceof CharSequence - } - - /* DIRECTIVE beforeScript - accepted examples: - - "source /cluster/bin/setup" - */ - if (drctv.containsKey("beforeScript")) { - assert drctv["beforeScript"] instanceof CharSequence - } - - /* DIRECTIVE cache - accepted examples: - - true - - false - - "deep" - - "lenient" - */ - if (drctv.containsKey("cache")) { - assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean - if (drctv["cache"] instanceof CharSequence) { - assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" - } - } - - /* DIRECTIVE conda - accepted examples: - - "bwa=0.7.15" - - "bwa=0.7.15 fastqc=0.11.5" - - ["bwa=0.7.15", "fastqc=0.11.5"] - */ - if (drctv.containsKey("conda")) { - if (drctv["conda"] instanceof List) { - drctv["conda"] = drctv["conda"].join(" ") - } - assert drctv["conda"] instanceof CharSequence - } - - /* DIRECTIVE container - accepted examples: - - "foo/bar:tag" - - [ registry: "reg", image: "im", tag: "ta" ] - is transformed to "reg/im:ta" - - [ image: "im" ] - is transformed to "im:latest" - */ - if (drctv.containsKey("container")) { - assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence - if (drctv["container"] instanceof Map) { - def m = drctv["container"] - assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") - def part1 = - System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : - params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? - m.registry ? m.registry + "/" : - "" - def part2 = m.image - def part3 = m.tag ? ":" + m.tag : ":latest" - drctv["container"] = part1 + part2 + part3 - } - } - - /* DIRECTIVE containerOptions - accepted examples: - - "--foo bar" - - ["--foo bar", "-f b"] - */ - if (drctv.containsKey("containerOptions")) { - if (drctv["containerOptions"] instanceof List) { - drctv["containerOptions"] = drctv["containerOptions"].join(" ") - } - assert drctv["containerOptions"] instanceof CharSequence - } - - /* DIRECTIVE cpus - accepted examples: - - 1 - - 10 - */ - if (drctv.containsKey("cpus")) { - assert drctv["cpus"] instanceof Integer - } - - /* DIRECTIVE disk - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("disk")) { - assert drctv["disk"] instanceof CharSequence - // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE echo - accepted examples: - - true - - false - */ - if (drctv.containsKey("echo")) { - assert drctv["echo"] instanceof Boolean - } - - /* DIRECTIVE errorStrategy - accepted examples: - - "terminate" - - "finish" - */ - if (drctv.containsKey("errorStrategy")) { - assert drctv["errorStrategy"] instanceof CharSequence - assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" - } - - /* DIRECTIVE executor - accepted examples: - - "local" - - "sge" - */ - if (drctv.containsKey("executor")) { - assert drctv["executor"] instanceof CharSequence - assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" - } - - /* DIRECTIVE machineType - accepted examples: - - "n1-highmem-8" - */ - if (drctv.containsKey("machineType")) { - assert drctv["machineType"] instanceof CharSequence - } - - /* DIRECTIVE maxErrors - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxErrors")) { - assert drctv["maxErrors"] instanceof Integer - } - - /* DIRECTIVE maxForks - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxForks")) { - assert drctv["maxForks"] instanceof Integer - } - - /* DIRECTIVE maxRetries - accepted examples: - - 1 - - 3 - */ - if (drctv.containsKey("maxRetries")) { - assert drctv["maxRetries"] instanceof Integer - } - - /* DIRECTIVE memory - accepted examples: - - "1 GB" - - "2TB" - - "3.2KB" - - "10.B" - */ - if (drctv.containsKey("memory")) { - assert drctv["memory"] instanceof CharSequence - // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") - // ^ does not allow closures - } - - /* DIRECTIVE module - accepted examples: - - "ncbi-blast/2.2.27" - - "ncbi-blast/2.2.27:t_coffee/10.0" - - ["ncbi-blast/2.2.27", "t_coffee/10.0"] - */ - if (drctv.containsKey("module")) { - if (drctv["module"] instanceof List) { - drctv["module"] = drctv["module"].join(":") - } - assert drctv["module"] instanceof CharSequence - } - - /* DIRECTIVE penv - accepted examples: - - "smp" - */ - if (drctv.containsKey("penv")) { - assert drctv["penv"] instanceof CharSequence - } - - /* DIRECTIVE pod - accepted examples: - - [ label: "key", value: "val" ] - - [ annotation: "key", value: "val" ] - - [ env: "key", value: "val" ] - - [ [label: "l", value: "v"], [env: "e", value: "v"]] - */ - if (drctv.containsKey("pod")) { - if (drctv["pod"] instanceof Map) { - drctv["pod"] = [ drctv["pod"] ] - } - assert drctv["pod"] instanceof List - drctv["pod"].forEach { pod -> - assert pod instanceof Map - // TODO: should more checks be added? - // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod - // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? - } - } - - /* DIRECTIVE publishDir - accepted examples: - - [] - - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] - - "/path/to/dir" - is transformed to [[ path: "/path/to/dir" ]] - - [ path: "/path/to/dir", mode: "cache" ] - is transformed to [[ path: "/path/to/dir", mode: "cache" ]] - */ - // TODO: should we also look at params["publishDir"]? - if (drctv.containsKey("publishDir")) { - def pblsh = drctv["publishDir"] - - // check different options - assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence - - // turn into list if not already so - // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. - pblsh = pblsh instanceof List ? pblsh : [ pblsh ] - - // check elements of publishDir - pblsh = pblsh.collect{ elem -> - // turn into map if not already so - elem = elem instanceof CharSequence ? [ path: elem ] : elem - - // check types and keys - assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" - assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") - - // check elements in map - assert elem.containsKey("path") - assert elem["path"] instanceof CharSequence - if (elem.containsKey("mode")) { - assert elem["mode"] instanceof CharSequence - assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] - } - if (elem.containsKey("overwrite")) { - assert elem["overwrite"] instanceof Boolean - } - if (elem.containsKey("pattern")) { - assert elem["pattern"] instanceof CharSequence - } - if (elem.containsKey("saveAs")) { - assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" - } - if (elem.containsKey("enabled")) { - assert elem["enabled"] instanceof Boolean - } - - // return final result - elem - } - // store final directive - drctv["publishDir"] = pblsh - } - - /* DIRECTIVE queue - accepted examples: - - "long" - - "short,long" - - ["short", "long"] - */ - if (drctv.containsKey("queue")) { - if (drctv["queue"] instanceof List) { - drctv["queue"] = drctv["queue"].join(",") - } - assert drctv["queue"] instanceof CharSequence - } - - /* DIRECTIVE label - accepted examples: - - "big_mem" - - "big_cpu" - - ["big_mem", "big_cpu"] - */ - if (drctv.containsKey("label")) { - if (drctv["label"] instanceof CharSequence) { - drctv["label"] = [ drctv["label"] ] - } - assert drctv["label"] instanceof List - drctv["label"].forEach { label -> - assert label instanceof CharSequence - // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") - // ^ does not allow closures - } - } - - /* DIRECTIVE scratch - accepted examples: - - true - - "/path/to/scratch" - - '$MY_PATH_TO_SCRATCH' - - "ram-disk" - */ - if (drctv.containsKey("scratch")) { - assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence - } - - /* DIRECTIVE storeDir - accepted examples: - - "/path/to/storeDir" - */ - if (drctv.containsKey("storeDir")) { - assert drctv["storeDir"] instanceof CharSequence - } - - /* DIRECTIVE stageInMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageInMode")) { - assert drctv["stageInMode"] instanceof CharSequence - assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] - } - - /* DIRECTIVE stageOutMode - accepted examples: - - "copy" - - "link" - */ - if (drctv.containsKey("stageOutMode")) { - assert drctv["stageOutMode"] instanceof CharSequence - assert drctv["stageOutMode"] in ["copy", "move", "rsync"] - } - - /* DIRECTIVE tag - accepted examples: - - "foo" - - '$id' - */ - if (drctv.containsKey("tag")) { - assert drctv["tag"] instanceof CharSequence - } - - /* DIRECTIVE time - accepted examples: - - "1h" - - "2days" - - "1day 6hours 3minutes 30seconds" - */ - if (drctv.containsKey("time")) { - assert drctv["time"] instanceof CharSequence - // todo: validation regex? - } - - return drctv -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' -def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { - // override defaults with args - def workflowArgs = defaultWfArgs + args - - // check whether 'key' exists - assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" - - // if 'key' is a closure, apply it to the original key - if (workflowArgs["key"] instanceof Closure) { - workflowArgs["key"] = workflowArgs["key"](meta.config.name) - } - def key = workflowArgs["key"] - assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" - assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" - - // check for any unexpected keys - def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] - def unexpectedKeys = workflowArgs.keySet() - expectedKeys - assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" - - // check whether directives exists and apply defaults - assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" - assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" - workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) - - // check whether directives exists and apply defaults - assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" - assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" - workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) - - // auto define publish, if so desired - if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + - // " Example: params.publish_dir = \"./output/\"" - def publishDir = getPublishDir() - - if (publishDir != null) { - workflowArgs.directives.publishDir = [[ - path: publishDir, - saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default - mode: "copy" - ]] - } - } - - // auto define transcript, if so desired - if (workflowArgs.auto.transcript == true) { - // can't assert at this level thanks to the no_publish profile - // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : - // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + - // " Example: params.transcripts_dir = \"./transcripts/\"" - def transcriptsDir = - params.containsKey("transcripts_dir") ? params.transcripts_dir : - params.containsKey("transcriptsDir") ? params.transcriptsDir : - params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : - params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : - null - if (transcriptsDir != null) { - def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') - def transcriptsPublishDir = [ - path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", - saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", - mode: "copy" - ] - def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] - workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir - } - } - - // if this is a stubrun, remove certain directives? - if (workflow.stubRun) { - workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) - } - - for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { - if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { - assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" - } - } - - // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? - for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { - if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { - log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." - } - } - - // check fromState - workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) - - // check toState - workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) - - // return output - return workflowArgs -} - -def _processFromState(fromState, key_, config_) { - assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : - "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" - if (fromState == null) { - return null - } - - // if fromState is a List, convert to map - if (fromState instanceof List) { - // check whether fromstate is a list[string] - assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" - fromState = fromState.collectEntries{[it, it]} - } - - // if fromState is a map, convert to closure - if (fromState instanceof Map) { - // check whether fromstate is a map[string, string] - assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" - assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" - def fromStateMap = fromState.clone() - def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} - // turn the map into a closure to be used later on - fromState = { it -> - def state = it[1] - assert state instanceof Map : "Error in module '$key_': the state is not a Map" - def data = fromStateMap.collectMany{newkey, origkey -> - // check whether newkey corresponds to a required argument - if (state.containsKey(origkey)) { - [[newkey, state[origkey]]] - } else if (!requiredInputNames.contains(origkey)) { - [] - } else { - throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") - } - }.collectEntries() - data - } - } - - return fromState -} - -def _processToState(toState, key_, config_) { - if (toState == null) { - toState = { tup -> tup[1] } - } - - // toState should be a closure, map[string, string], or list[string] - assert toState instanceof Closure || toState instanceof Map || toState instanceof List : - "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" - - // if toState is a List, convert to map - if (toState instanceof List) { - // check whether toState is a list[string] - assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" - toState = toState.collectEntries{[it, it]} - } - - // if toState is a map, convert to closure - if (toState instanceof Map) { - // check whether toState is a map[string, string] - assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" - assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" - def toStateMap = toState.clone() - def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} - // turn the map into a closure to be used later on - toState = { it -> - def output = it[1] - def state = it[2] - assert output instanceof Map : "Error in module '$key_': the output is not a Map" - assert state instanceof Map : "Error in module '$key_': the state is not a Map" - def extraEntries = toStateMap.collectMany{newkey, origkey -> - // check whether newkey corresponds to a required argument - if (output.containsKey(origkey)) { - [[newkey, output[origkey]]] - } else if (!requiredOutputNames.contains(origkey)) { - [] - } else { - throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") - } - }.collectEntries() - state + extraEntries - } - } - - return toState -} - -// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' -def _debug(workflowArgs, debugKey) { - if (workflowArgs.debug) { - view { "process '${workflowArgs.key}' $debugKey tuple: $it" } - } else { - map { it } - } -} - -// depends on: innerWorkflowFactory -def workflowFactory(Map args, Map defaultWfArgs, Map meta) { - def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) - def key_ = workflowArgs["key"] - - workflow workflowInstance { - take: input_ - - main: - def chModified = input_ - | checkUniqueIds([:]) - | _debug(workflowArgs, "input") - | map { tuple -> - tuple = deepClone(tuple) - - if (workflowArgs.map) { - tuple = workflowArgs.map(tuple) - } - if (workflowArgs.mapId) { - tuple[0] = workflowArgs.mapId(tuple[0]) - } - if (workflowArgs.mapData) { - tuple[1] = workflowArgs.mapData(tuple[1]) - } - if (workflowArgs.mapPassthrough) { - tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) - } - - // check tuple - assert tuple instanceof List : - "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" - assert tuple.size() >= 2 : - "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: tuple.size() == ${tuple.size()}" - - // check id field - if (tuple[0] instanceof GString) { - tuple[0] = tuple[0].toString() - } - assert tuple[0] instanceof CharSequence : - "Error in module '${key_}': first element of tuple in channel should be a String\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Found: ${tuple[0]}" - - // match file to input file - if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { - def inputFiles = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - - assert inputFiles.size() == 1 : - "Error in module '${key_}' id '${tuple[0]}'.\n" + - " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + - " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" - - tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() - } - - // check data field - assert tuple[1] instanceof Map : - "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + - " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // rename keys of data field in tuple - if (workflowArgs.renameKeys) { - assert workflowArgs.renameKeys instanceof Map : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" - assert tuple[1] instanceof Map : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" - - // TODO: allow renameKeys to be a function? - workflowArgs.renameKeys.each { newKey, oldKey -> - assert newKey instanceof CharSequence : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" - assert oldKey instanceof CharSequence : - "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + - " Example: renameKeys: ['new_key': 'old_key'].\n" + - " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" - assert tuple[1].containsKey(oldKey) : - "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + - " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" - tuple[1].put(newKey, tuple[1][oldKey]) - } - tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) - } - tuple - } - - - def chRun = null - def chPassthrough = null - if (workflowArgs.runIf) { - def runIfBranch = chModified.branch{ tup -> - run: workflowArgs.runIf(tup[0], tup[1]) - passthrough: true - } - chRun = runIfBranch.run - chPassthrough = runIfBranch.passthrough - } else { - chRun = chModified - chPassthrough = Channel.empty() - } - - def chRunFiltered = workflowArgs.filter ? - chRun | filter{workflowArgs.filter(it)} : - chRun - - def chArgs = workflowArgs.fromState ? - chRunFiltered | map{ - def new_data = workflowArgs.fromState(it.take(2)) - [it[0], new_data] - } : - chRunFiltered | map {tup -> tup.take(2)} - - // fill in defaults - def chArgsWithDefaults = chArgs - | map { tuple -> - def id_ = tuple[0] - def data_ = tuple[1] - - // TODO: could move fromState to here - - // fetch default params from functionality - def defaultArgs = meta.config.allArguments - .findAll { it.containsKey("default") } - .collectEntries { [ it.plainName, it.default ] } - - // fetch overrides in params - def paramArgs = meta.config.allArguments - .findAll { par -> - def argKey = key_ + "__" + par.plainName - params.containsKey(argKey) - } - .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } - - // fetch overrides in data - def dataArgs = meta.config.allArguments - .findAll { data_.containsKey(it.plainName) } - .collectEntries { [ it.plainName, data_[it.plainName] ] } - - // combine params - def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs - - // remove arguments with explicit null values - combinedArgs - .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} - - combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) - - [id_, combinedArgs] + tuple.drop(2) - } - - // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. - def chInitialOutput = chArgsWithDefaults - | _debug(workflowArgs, "processed") - // run workflow - | innerWorkflowFactory(workflowArgs) - // check output tuple - | map { id_, output_ -> - - // see if output map contains metadata - def meta_ = - output_ instanceof Map && output_.containsKey("_meta") ? - output_["_meta"] : - [:] - def join_id = meta_.join_id ?: id_ - - // remove metadata - output_ = output_.findAll{k, v -> k != "_meta"} - - // check value types - output_ = _processOutputValues(output_, meta.config, id_, key_) - - // simplify output if need be - if (workflowArgs.auto.simplifyOutput && output_.size() == 1) { - output_ = output_.values()[0] - } - - [join_id, id_, output_] - } - // | view{"chInitialOutput: ${it.take(3)}"} - - // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] - def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_) - // input tuple format: [join_id, id, output, prev_state, ...] - // output tuple format: [join_id, id, new_state, ...] - | map{ tup -> - def new_state = workflowArgs.toState(tup.drop(1).take(3)) - tup.take(2) + [new_state] + tup.drop(4) - } - - if (workflowArgs.auto.publish == "state") { - def chPublish = chNewState - // input tuple format: [join_id, id, new_state, ...] - // output tuple format: [join_id, id, new_state] - | map{ tup -> - tup.take(3) - } - - safeJoin(chPublish, chArgsWithDefaults, key_) - // input tuple format: [join_id, id, new_state, orig_state, ...] - // output tuple format: [id, new_state, orig_state] - | map { tup -> - tup.drop(1).take(3) - } - | publishStatesByConfig(key: key_, config: meta.config) - } - - // remove join_id and meta - chReturn = chNewState - | map { tup -> - // input tuple format: [join_id, id, new_state, ...] - // output tuple format: [id, new_state, ...] - tup.drop(1) - } - | _debug(workflowArgs, "output") - | concat(chPassthrough) - - emit: chReturn - } - - def wf = workflowInstance.cloneWithName(key_) - - // add factory function - wf.metaClass.run = { runArgs -> - workflowFactory(runArgs, workflowArgs, meta) - } - // add config to module for later introspection - wf.metaClass.config = meta.config - - return wf -} - -nextflow.enable.dsl=2 - -// START COMPONENT-SPECIFIC CODE - -// create meta object -meta = [ - "resources_dir": moduleDir.toRealPath().normalize(), - "config": processConfig(readJsonBlob('''{ - "name" : "umitools_prepareforquant", - "version" : "main", - "argument_groups" : [ - { - "name" : "Input", - "arguments" : [ - { - "type" : "file", - "name" : "--bam", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - } - ] - }, - { - "name" : "Output", - "arguments" : [ - { - "type" : "file", - "name" : "--output", - "default" : [ - "$id.transcriptome_sorted.bam" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "file", - "name" : "--log", - "default" : [ - "$id.$key.log" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "output", - "multiple" : false, - "multiple_sep" : ";" - } - ] - } - ], - "resources" : [ - { - "type" : "bash_script", - "path" : "script.sh", - "is_executable" : true - }, - { - "type" : "file", - "path" : "prepare-for-rsem.py" - } - ], - "description" : "Fix paired-end reads in name sorted BAM file to prepare for salmon quantification", - "info" : { - "migration_info" : { - "git_repo" : "https://github.com/nf-core/rnaseq.git", - "paths" : [ - "modules/local/umitools_prepareforrsem.nf" - ], - "last_sha" : "0a1bdcfbb498987643b74e9fccab85ccd9f2a17d" - } - }, - "status" : "enabled", - "requirements" : { - "commands" : [ - "ps" - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], - "runners" : [ - { - "type" : "executable", - "id" : "executable", - "docker_setup_strategy" : "ifneedbepullelsecachedbuild" - }, - { - "type" : "nextflow", - "id" : "nextflow", - "directives" : { - "tag" : "$id" - }, - "auto" : { - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false - }, - "config" : { - "labels" : { - "mem1gb" : "memory = 1000000000.B", - "mem2gb" : "memory = 2000000000.B", - "mem5gb" : "memory = 5000000000.B", - "mem10gb" : "memory = 10000000000.B", - "mem20gb" : "memory = 20000000000.B", - "mem50gb" : "memory = 50000000000.B", - "mem100gb" : "memory = 100000000000.B", - "mem200gb" : "memory = 200000000000.B", - "mem500gb" : "memory = 500000000000.B", - "mem1tb" : "memory = 1000000000000.B", - "mem2tb" : "memory = 2000000000000.B", - "mem5tb" : "memory = 5000000000000.B", - "mem10tb" : "memory = 10000000000000.B", - "mem20tb" : "memory = 20000000000000.B", - "mem50tb" : "memory = 50000000000000.B", - "mem100tb" : "memory = 100000000000000.B", - "mem200tb" : "memory = 200000000000000.B", - "mem500tb" : "memory = 500000000000000.B", - "mem1gib" : "memory = 1073741824.B", - "mem2gib" : "memory = 2147483648.B", - "mem4gib" : "memory = 4294967296.B", - "mem8gib" : "memory = 8589934592.B", - "mem16gib" : "memory = 17179869184.B", - "mem32gib" : "memory = 34359738368.B", - "mem64gib" : "memory = 68719476736.B", - "mem128gib" : "memory = 137438953472.B", - "mem256gib" : "memory = 274877906944.B", - "mem512gib" : "memory = 549755813888.B", - "mem1tib" : "memory = 1099511627776.B", - "mem2tib" : "memory = 2199023255552.B", - "mem4tib" : "memory = 4398046511104.B", - "mem8tib" : "memory = 8796093022208.B", - "mem16tib" : "memory = 17592186044416.B", - "mem32tib" : "memory = 35184372088832.B", - "mem64tib" : "memory = 70368744177664.B", - "mem128tib" : "memory = 140737488355328.B", - "mem256tib" : "memory = 281474976710656.B", - "mem512tib" : "memory = 562949953421312.B", - "cpu1" : "cpus = 1", - "cpu2" : "cpus = 2", - "cpu5" : "cpus = 5", - "cpu10" : "cpus = 10", - "cpu20" : "cpus = 20", - "cpu50" : "cpus = 50", - "cpu100" : "cpus = 100", - "cpu200" : "cpus = 200", - "cpu500" : "cpus = 500", - "cpu1000" : "cpus = 1000" - } - }, - "debug" : false, - "container" : "docker" - } - ], - "engines" : [ - { - "type" : "docker", - "id" : "docker", - "image" : "ubuntu:22.04", - "target_registry" : "images.viash-hub.com", - "target_tag" : "main", - "namespace_separator" : "/", - "setup" : [ - { - "type" : "apt", - "packages" : [ - "pip" - ], - "interactive" : false - }, - { - "type" : "python", - "user" : false, - "packages" : [ - "umi_tools", - "pysam" - ], - "upgrade" : true - } - ] - }, - { - "type" : "native", - "id" : "native" - } - ], - "build_info" : { - "config" : "/workdir/root/repo/src/umitools_prepareforquant/config.vsh.yaml", - "runner" : "nextflow", - "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/umitools_prepareforquant", - "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" - }, - "package_config" : { - "name" : "rnaseq", - "version" : "main", - "info" : { - "test_resources" : [ - { - "path" : "gs://viash-hub-test-data/rnaseq/v1", - "dest" : "testData" - } - ] - }, - "repositories" : [ - { - "type" : "vsh", - "name" : "biobox", - "repo" : "vsh/biobox", - "tag" : "main" - }, - { - "type" : "vsh", - "name" : "craftbox", - "repo" : "craftbox", - "tag" : "v0.1.0" - } - ], - "viash_version" : "0.9.0", - "source" : "/workdir/root/repo/src", - "target" : "/workdir/root/repo/target", - "config_mods" : [ - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n", - ".engines += { type: \\"native\\" }", - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", - ".engines[.type == 'docker'].target_tag := 'main'" - ], - "organization" : "vsh" - } -}''')) -] - -// resolve dependencies dependencies (if any) - - -// inner workflow -// inner workflow hook -def innerWorkflowFactory(args) { - def rawScript = '''set -e -tempscript=".viash_script.sh" -cat > "$tempscript" << VIASHMAIN -## VIASH START -# The following code has been auto-generated by Viash. -$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi ) -$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi ) -$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi ) -$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi ) -$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi ) -$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi ) -$( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "${VIASH_META_EXECUTABLE}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_executable='&'#" ; else echo "# meta_executable="; fi ) -$( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "${VIASH_META_CONFIG}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_config='&'#" ; else echo "# meta_config="; fi ) -$( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "${VIASH_META_TEMP_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_temp_dir='&'#" ; else echo "# meta_temp_dir="; fi ) -$( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "${VIASH_META_CPUS}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_cpus='&'#" ; else echo "# meta_cpus="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "${VIASH_META_MEMORY_B}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_b='&'#" ; else echo "# meta_memory_b="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "${VIASH_META_MEMORY_KB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kb='&'#" ; else echo "# meta_memory_kb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "${VIASH_META_MEMORY_MB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mb='&'#" ; else echo "# meta_memory_mb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "${VIASH_META_MEMORY_GB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gb='&'#" ; else echo "# meta_memory_gb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "${VIASH_META_MEMORY_TB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tb='&'#" ; else echo "# meta_memory_tb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "${VIASH_META_MEMORY_PB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pb='&'#" ; else echo "# meta_memory_pb="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "${VIASH_META_MEMORY_KIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_kib='&'#" ; else echo "# meta_memory_kib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "${VIASH_META_MEMORY_MIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_mib='&'#" ; else echo "# meta_memory_mib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "${VIASH_META_MEMORY_GIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_gib='&'#" ; else echo "# meta_memory_gib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_tib='&'#" ; else echo "# meta_memory_tib="; fi ) -$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi ) - -## VIASH END -#!/bin/bash - -set -eo pipefail - -python3 "\\$meta_resources_dir/prepare-for-rsem.py" \\\\ - --stdin=\\$par_bam \\\\ - --stdout=\\$par_output \\\\ - --log=\\$par_log -VIASHMAIN -bash "$tempscript" -''' - - return vdsl3WorkflowFactory(args, meta, rawScript) -} - - - -/** - * Generate a workflow for VDSL3 modules. - * - * This function is called by the workflowFactory() function. - * - * Input channel: [id, input_map] - * Output channel: [id, output_map] - * - * Internally, this workflow will convert the input channel - * to a format which the Nextflow module will be able to handle. - */ -def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { - def key = args["key"] - def processObj = null - - workflow processWf { - take: input_ - main: - - if (processObj == null) { - processObj = _vdsl3ProcessFactory(args, meta, rawScript) - } - - output_ = input_ - | map { tuple -> - def id = tuple[0] - def data_ = tuple[1] - - if (workflow.stubRun) { - // add id if missing - data_ = [id: 'stub'] + data_ - } - - // process input files separately - def inputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { par -> - def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] - def inputFiles = [] - if (val == null) { - inputFiles = [] - } else if (val instanceof List) { - inputFiles = val - } else if (val instanceof Path) { - inputFiles = [ val ] - } else { - inputFiles = [] - } - if (!workflow.stubRun) { - // throw error when an input file doesn't exist - inputFiles.each{ file -> - assert file.exists() : - "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + - " Required input file does not exist.\n" + - " Path: '$file'.\n" + - " Expected input file to exist" - } - } - inputFiles - } - - // remove input files - def argsExclInputFiles = meta.config.allArguments - .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } - .collectEntries { par -> - def parName = par.plainName - def val = data_[parName] - if (par.multiple && val instanceof Collection) { - val = val.join(par.multiple_sep) - } - if (par.direction == "output" && par.type == "file") { - val = val - .replaceAll('\\$id', id) - .replaceAll('\\$\\{id\\}', id) - .replaceAll('\\$key', key) - .replaceAll('\\$\\{key\\}', key) - } - [parName, val] - } - - [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] - } - | processObj - | map { output -> - def outputFiles = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .indexed() - .collectEntries{ index, par -> - def out = output[index + 1] - // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) - if (!out instanceof List || out.size() <= 1) { - if (par.multiple) { - out = [] - } else { - assert !par.required : - "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + - " Required output file is missing" - out = null - } - } else if (out.size() == 2 && !par.multiple) { - out = out[1] - } else { - out = out.drop(1) - } - [ par.plainName, out ] - } - - // drop null outputs - outputFiles.removeAll{it.value == null} - - [ output[0], outputFiles ] - } - emit: output_ - } - - return processWf -} - -// depends on: session? -def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { - // autodetect process key - def wfKey = workflowArgs["key"] - def procKeyPrefix = "${wfKey}_process" - def scriptMeta = nextflow.script.ScriptMeta.current() - def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} - def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} - def newNumber = (numbers + [-1]).max() + 1 - - def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" - - if (newNumber > 0) { - log.warn "Key for module '${wfKey}' is duplicated.\n", - "If you run a component multiple times in the same workflow,\n" + - "it's recommended you set a unique key for every call,\n" + - "for example: ${wfKey}.run(key: \"foo\")." - } - - // subset directives and convert to list of tuples - def drctv = workflowArgs.directives - - // TODO: unit test the two commands below - // convert publish array into tags - def valueToStr = { val -> - // ignore closures - if (val instanceof CharSequence) { - if (!val.matches('^[{].*[}]$')) { - '"' + val + '"' - } else { - val - } - } else if (val instanceof List) { - "[" + val.collect{valueToStr(it)}.join(", ") + "]" - } else if (val instanceof Map) { - "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" - } else { - val.inspect() - } - } - - // multiple entries allowed: label, publishdir - def drctvStrs = drctv.collect { key, value -> - if (key in ["label", "publishDir"]) { - value.collect{ val -> - if (val instanceof Map) { - "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else if (val == null) { - "" - } else { - "\n$key " + valueToStr(val) - } - }.join() - } else if (value instanceof Map) { - "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") - } else { - "\n$key " + valueToStr(value) - } - }.join() - - def inputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "input" } - .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } - .join() - - def outputPaths = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - // insert dummy into every output (see nextflow-io/nextflow#2678) - if (!par.multiple) { - ', path{[".exitcode", args.' + par.plainName + ']}' - } else { - ', path{[".exitcode"] + args.' + par.plainName + '}' - } - } - .join() - - // TODO: move this functionality somewhere else? - if (workflowArgs.auto.transcript) { - outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' - } else { - outputPaths = outputPaths + ', path{[".exitcode"]}' - } - - // create dirs for output files (based on BashWrapper.createParentFiles) - def createParentStr = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" && it.create_parent } - .collect { par -> - def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" - "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" - } - .join("\n") - - // construct inputFileExports - def inputFileExports = meta.config.allArguments - .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } - .collect { par -> - def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" - "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" - } - - // NOTE: if using docker, use /tmp instead of tmpDir! - def tmpDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('VIASH_TMPDIR') ?: - System.getenv('VIASH_TEMPDIR') ?: - System.getenv('VIASH_TMP') ?: - System.getenv('TEMP') ?: - System.getenv('TMPDIR') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMP') ?: - '/tmp' - ).toAbsolutePath() - - // construct stub - def stub = meta.config.allArguments - .findAll { it.type == "file" && it.direction == "output" } - .collect { par -> - "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" - } - .join("\n") - - // escape script - def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') - - // publishdir assert - def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? - """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : - "" - - // generate process string - def procStr = - """nextflow.enable.dsl=2 - | - |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } - |process $procKey {$drctvStrs - |input: - | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") - |output: - | tuple val("\$id")$outputPaths, optional: true - |stub: - |\"\"\" - |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } - |$stub - |\"\"\" - |script:$assertStr - |def parInject = args - | .findAll{key, value -> value != null} - | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} - | .join("\\n") - |\"\"\" - |# meta exports - |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" - |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" - |export VIASH_META_NAME="${meta.config.name}" - |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" - |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" - |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } - |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } - |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then - | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) - | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) - | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) - | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) - | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) - | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) - | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) - | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) - | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) - | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) - |fi - | - |# meta synonyms - |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" - |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" - | - |# create output dirs if need be - |function mkdir_parent { - | for file in "\\\$@"; do - | mkdir -p "\\\$(dirname "\\\$file")" - | done - |} - |$createParentStr - | - |# argument exports${inputFileExports.join()} - |\$parInject - | - |# process script - |${escapedScript} - |\"\"\" - |} - |""".stripMargin() - - // TODO: print on debug - // if (workflowArgs.debug == true) { - // println("######################\n$procStr\n######################") - // } - - // write process to temp file - def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") - addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } - tempFile.text = procStr - - // create process from temp file - def binding = new nextflow.script.ScriptBinding([:]) - def session = nextflow.Nextflow.getSession() - def parser = new nextflow.script.ScriptParser(session) - .setModule(true) - .setBinding(binding) - def moduleScript = parser.runScript(tempFile) - .getScript() - - // register module in meta - def module = new nextflow.script.IncludeDef.Module(name: procKey) - scriptMeta.addModule(moduleScript, module.name, module.alias) - - // retrieve and return process from meta - return scriptMeta.getProcess(procKey) -} - -// defaults -meta["defaults"] = [ - // key to be used to trace the process and determine output names - key: null, - - // fixed arguments to be passed to script - args: [:], - - // default directives - directives: readJsonBlob('''{ - "container" : { - "registry" : "images.viash-hub.com", - "image" : "vsh/rnaseq/umitools_prepareforquant", - "tag" : "main" - }, - "tag" : "$id" -}'''), - - // auto settings - auto: readJsonBlob('''{ - "simplifyInput" : true, - "simplifyOutput" : false, - "transcript" : false, - "publish" : false -}'''), - - // Apply a map over the incoming tuple - // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` - map: null, - - // Apply a map over the ID element of a tuple (i.e. the first element) - // Example: `{ id -> id + "_foo" }` - mapId: null, - - // Apply a map over the data element of a tuple (i.e. the second element) - // Example: `{ data -> [ input: data.output ] }` - mapData: null, - - // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) - // Example: `{ pt -> pt.drop(1) }` - mapPassthrough: null, - - // Filter the channel - // Example: `{ tup -> tup[0] == "foo" }` - filter: null, - - // Choose whether or not to run the component on the tuple if the condition is true. - // Otherwise, the tuple will be passed through. - // Example: `{ tup -> tup[0] != "skip_this" }` - runIf: null, - - // Rename keys in the data field of the tuple (i.e. the second element) - // Will likely be deprecated in favour of `fromState`. - // Example: `[ "new_key": "old_key" ]` - renameKeys: null, - - // Fetch data from the state and pass it to the module without altering the current state. - // - // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be passed to the module as is. - // - If it is a `List[String]`, the data will be the values of the state at the given keys. - // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. - // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. - // - // Example: `{ id, state -> [input: state.fastq_file] }` - // Default: `null` - fromState: null, - - // Determine how the state should be updated after the module has been run. - // - // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. - // - // - If it is `null`, the state will be replaced with the output of the module. - // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. - // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. - // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. - // - // Example: `{ id, output, state -> state + [counts: state.output] }` - // Default: `{ id, output, state -> output }` - toState: null, - - // Whether or not to print debug messages - // Default: `false` - debug: false -] - -// initialise default workflow -meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) - -// add workflow to environment -nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) - -// anonymous workflow for running this module as a standalone -workflow { - // add id argument if it's not already in the config - // TODO: deep copy - def newConfig = deepClone(meta.config) - def newParams = deepClone(params) - - def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} - if (!argsContainsId) { - def idArg = [ - 'name': '--id', - 'required': false, - 'type': 'string', - 'description': 'A unique id for every entry.', - 'multiple': false - ] - newConfig.arguments.add(0, idArg) - newConfig = processConfig(newConfig) - } - if (!newParams.containsKey("id")) { - newParams.id = "run" - } - - helpMessage(newConfig) - - channelFromParams(newParams, newConfig) - // make sure id is not in the state if id is not in the args - | map {id, state -> - if (!argsContainsId) { - [id, state.findAll{k, v -> k != "id"}] - } else { - [id, state] - } - } - | meta.workflow.run( - auto: [ publish: "state" ] - ) -} - -// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/umitools_prepareforquant/nextflow.config b/target/nextflow/umitools_prepareforquant/nextflow.config deleted file mode 100644 index 717fc6b..0000000 --- a/target/nextflow/umitools_prepareforquant/nextflow.config +++ /dev/null @@ -1,125 +0,0 @@ -manifest { - name = 'umitools_prepareforquant' - mainScript = 'main.nf' - nextflowVersion = '!>=20.12.1-edge' - version = 'main' - description = 'Fix paired-end reads in name sorted BAM file to prepare for salmon quantification' -} - -process.container = 'nextflow/bash:latest' - -// detect tempdir -tempDir = java.nio.file.Paths.get( - System.getenv('NXF_TEMP') ?: - System.getenv('VIASH_TEMP') ?: - System.getenv('TEMPDIR') ?: - System.getenv('TMPDIR') ?: - '/tmp' -).toAbsolutePath() - -profiles { - no_publish { - process { - withName: '.*' { - publishDir = [ - enabled: false - ] - } - } - } - mount_temp { - docker.temp = tempDir - podman.temp = tempDir - charliecloud.temp = tempDir - } - docker { - docker.enabled = true - // docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } -} - -process{ - withLabel: mem1gb { memory = 1000000000.B } - withLabel: mem2gb { memory = 2000000000.B } - withLabel: mem5gb { memory = 5000000000.B } - withLabel: mem10gb { memory = 10000000000.B } - withLabel: mem20gb { memory = 20000000000.B } - withLabel: mem50gb { memory = 50000000000.B } - withLabel: mem100gb { memory = 100000000000.B } - withLabel: mem200gb { memory = 200000000000.B } - withLabel: mem500gb { memory = 500000000000.B } - withLabel: mem1tb { memory = 1000000000000.B } - withLabel: mem2tb { memory = 2000000000000.B } - withLabel: mem5tb { memory = 5000000000000.B } - withLabel: mem10tb { memory = 10000000000000.B } - withLabel: mem20tb { memory = 20000000000000.B } - withLabel: mem50tb { memory = 50000000000000.B } - withLabel: mem100tb { memory = 100000000000000.B } - withLabel: mem200tb { memory = 200000000000000.B } - withLabel: mem500tb { memory = 500000000000000.B } - withLabel: mem1gib { memory = 1073741824.B } - withLabel: mem2gib { memory = 2147483648.B } - withLabel: mem4gib { memory = 4294967296.B } - withLabel: mem8gib { memory = 8589934592.B } - withLabel: mem16gib { memory = 17179869184.B } - withLabel: mem32gib { memory = 34359738368.B } - withLabel: mem64gib { memory = 68719476736.B } - withLabel: mem128gib { memory = 137438953472.B } - withLabel: mem256gib { memory = 274877906944.B } - withLabel: mem512gib { memory = 549755813888.B } - withLabel: mem1tib { memory = 1099511627776.B } - withLabel: mem2tib { memory = 2199023255552.B } - withLabel: mem4tib { memory = 4398046511104.B } - withLabel: mem8tib { memory = 8796093022208.B } - withLabel: mem16tib { memory = 17592186044416.B } - withLabel: mem32tib { memory = 35184372088832.B } - withLabel: mem64tib { memory = 70368744177664.B } - withLabel: mem128tib { memory = 140737488355328.B } - withLabel: mem256tib { memory = 281474976710656.B } - withLabel: mem512tib { memory = 562949953421312.B } - withLabel: cpu1 { cpus = 1 } - withLabel: cpu2 { cpus = 2 } - withLabel: cpu5 { cpus = 5 } - withLabel: cpu10 { cpus = 10 } - withLabel: cpu20 { cpus = 20 } - withLabel: cpu50 { cpus = 50 } - withLabel: cpu100 { cpus = 100 } - withLabel: cpu200 { cpus = 200 } - withLabel: cpu500 { cpus = 500 } - withLabel: cpu1000 { cpus = 1000 } -} - - diff --git a/target/nextflow/umitools_prepareforquant/nextflow_schema.json b/target/nextflow/umitools_prepareforquant/nextflow_schema.json deleted file mode 100644 index 5993e05..0000000 --- a/target/nextflow/umitools_prepareforquant/nextflow_schema.json +++ /dev/null @@ -1,106 +0,0 @@ -{ -"$schema": "http://json-schema.org/draft-07/schema", -"title": "umitools_prepareforquant", -"description": "Fix paired-end reads in name sorted BAM file to prepare for salmon quantification", -"type": "object", -"definitions": { - - - - "input" : { - "title": "Input", - "type": "object", - "description": "No description", - "properties": { - - - "bam": { - "type": - "string", - "description": "Type: `file`. ", - "help_text": "Type: `file`. " - - } - - -} -}, - - - "output" : { - "title": "Output", - "type": "object", - "description": "No description", - "properties": { - - - "output": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.output.bam`. ", - "help_text": "Type: `file`, default: `$id.$key.output.bam`. " - , - "default":"$id.$key.output.bam" - } - - - , - "log": { - "type": - "string", - "description": "Type: `file`, default: `$id.$key.log.log`. ", - "help_text": "Type: `file`, default: `$id.$key.log.log`. " - , - "default":"$id.$key.log.log" - } - - -} -}, - - - "nextflow input-output arguments" : { - "title": "Nextflow input-output arguments", - "type": "object", - "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", - "properties": { - - - "publish_dir": { - "type": - "string", - "description": "Type: `string`, required, example: `output/`. Path to an output directory", - "help_text": "Type: `string`, required, example: `output/`. Path to an output directory." - - } - - - , - "param_list": { - "type": - "string", - "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel", - "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.", - "hidden": true - - } - - -} -} -}, -"allOf": [ - - { - "$ref": "#/definitions/input" - }, - - { - "$ref": "#/definitions/output" - }, - - { - "$ref": "#/definitions/nextflow input-output arguments" - } -] -} diff --git a/target/nextflow/umitools_prepareforquant/prepare-for-rsem.py b/target/nextflow/umitools_prepareforquant/prepare-for-rsem.py deleted file mode 100755 index 59dd01a..0000000 --- a/target/nextflow/umitools_prepareforquant/prepare-for-rsem.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 - -""" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Credits -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This script is a clone of the "prepare-for-rsem.py" script written by -Ian Sudbury, Tom Smith and other contributors to the UMI-tools package: -https://github.com/CGATOxford/UMI-tools - -It has been included here to address problems encountered with -Salmon quant and RSEM as discussed in the issue below: -https://github.com/CGATOxford/UMI-tools/issues/465 - -When the "umi_tools prepare-for-rsem" command becomes available in an official -UMI-tools release this script will be replaced and deprecated. - -Commit: -https://github.com/CGATOxford/UMI-tools/blob/bf8608d6a172c5ca0dcf33c126b4e23429177a72/umi_tools/prepare-for-rsem.py - -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -prepare_for_rsem - make the output from dedup or group compatible with RSEM -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The SAM format specification states that the mnext and mpos fields should point -to the primary alignment of a read's mate. However, not all aligners adhere to -this standard. In addition, the RSEM software requires that the mate of a read1 -appears directly after it in its input BAM. This requires that there is exactly -one read1 alignment for every read2 and vice versa. - -In general (except in a few edge cases) UMI tools outputs only the read2 to that -corresponds to the read specified in the mnext and mpos positions of a selected -read1, and only outputs this read once, even if multiple read1s point to it. -This makes UMI-tools outputs incompatible with RSEM. This script takes the output -from dedup or groups and ensures that each read1 has exactly one read2 (and vice -versa), that read2 always appears directly after read1,and that pairs point to -each other (note this is technically not valid SAM format). Copy any specified -tags from read1 to read2 if they are present (by default, UG and BX, the unique -group and correct UMI tags added by _group_) - -Input must to name sorted. - - -https://raw.githubusercontent.com/CGATOxford/UMI-tools/master/LICENSE - -""" - -from umi_tools import Utilities as U -from collections import defaultdict, Counter -import pysam -import sys - -usage = """ -prepare_for_rsem - make output from dedup or group compatible with RSEM - -Usage: umi_tools prepare_for_rsem [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM] - - note: If --stdout is omited, standard out is output. To - generate a valid BAM file on standard out, please - redirect log with --log=LOGFILE or --log2stderr """ - - -def chunk_bam(bamfile): - """Take in a iterator of pysam.AlignmentSegment entries and yield - lists of reads that all share the same name""" - - last_query_name = None - output_buffer = list() - - for read in bamfile: - if last_query_name is not None and last_query_name != read.query_name: - yield (output_buffer) - output_buffer = list() - - last_query_name = read.query_name - output_buffer.append(read) - - yield (output_buffer) - - -def copy_tags(tags, read1, read2): - """Given a list of tags, copies the values of these tags from read1 - to read2, if the tag is set""" - - for tag in tags: - try: - read1_tag = read1.get_tag(tag, with_value_type=True) - read2.set_tag(tag, value=read1_tag[0], value_type=read1_tag[1]) - except KeyError: - pass - - return read2 - - -def pick_mate(read, template_dict, mate_key): - """Find the mate of read in the template dict using key. It will retrieve - all reads at that key, and then scan to pick the one that refers to _read_ - as it's mate. If there is no such read, it picks a first one it comes to""" - - mate = None - - # get a list of secondary reads at the correct alignment position - potential_mates = template_dict[not read.is_read1][mate_key] - - # search through one at a time to find a read that points to the current read - # as its mate. - for candidate_mate in potential_mates: - if ( - candidate_mate.next_reference_name == read.reference_name - and candidate_mate.next_reference_start == read.pos - ): - mate = candidate_mate - - # if no such read is found, then pick any old secondary alignment at that position - # note: this happens when UMI-tools outputs the wrong read as something's pair. - if mate is None and len(potential_mates) > 0: - mate = potential_mates[0] - - return mate - - -def main(argv=None): - if argv is None: - argv = sys.argv - - # setup command line parser - parser = U.OptionParser(version="%prog version: $Id$", usage=usage, description=globals()["__doc__"]) - group = U.OptionGroup(parser, "RSEM preparation specific options") - - group.add_option( - "--tags", - dest="tags", - type="string", - default="UG,BX", - help="Comma-separated list of tags to transfer from read1 to read2", - ) - group.add_option( - "--sam", dest="sam", action="store_true", default=False, help="input and output SAM rather than BAM" - ) - - parser.add_option_group(group) - - # add common options (-h/--help, ...) and parse command line - (options, args) = U.Start( - parser, argv=argv, add_group_dedup_options=False, add_umi_grouping_options=False, add_sam_options=False - ) - - skipped_stats = Counter() - - if options.stdin != sys.stdin: - in_name = options.stdin.name - options.stdin.close() - else: - in_name = "-" - - if options.sam: - mode = "" - else: - mode = "b" - - inbam = pysam.AlignmentFile(in_name, "r" + mode) - - if options.stdout != sys.stdout: - out_name = options.stdout.name - options.stdout.close() - else: - out_name = "-" - - outbam = pysam.AlignmentFile(out_name, "w" + mode, template=inbam) - - options.tags = options.tags.split(",") - - for template in chunk_bam(inbam): - assert len(set(r.query_name for r in template)) == 1 - current_template = {True: defaultdict(list), False: defaultdict(list)} - - for read in template: - key = (read.reference_name, read.pos, not read.is_secondary) - current_template[read.is_read1][key].append(read) - - output = set() - - for read in template: - mate = None - - # if this read is a non_primary alignment, we first want to check if it has a mate - # with the non-primary alignment flag set. - - mate_key_primary = True - mate_key_secondary = (read.next_reference_name, read.next_reference_start, False) - - # First look for a read that has the same primary/secondary status - # as read (i.e. secondary mate for secondary read, and primary mate - # for primary read) - mate_key = (read.next_reference_name, read.next_reference_start, read.is_secondary) - mate = pick_mate(read, current_template, mate_key) - - # If none was found then look for the opposite (primary mate of secondary - # read or seconadary mate of primary read) - if mate is None: - mate_key = (read.next_reference_name, read.next_reference_start, not read.is_secondary) - mate = pick_mate(read, current_template, mate_key) - - # If we still don't have a mate, then their can't be one? - if mate is None: - skipped_stats["no_mate"] += 1 - U.warn( - "Alignment {} has no mate -- skipped".format( - "\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)])) - ) - ) - continue - - # because we might want to make changes to the read, but not have those changes reflected - # if we need the read again,we copy the read. This is only way I can find to do this. - read = pysam.AlignedSegment().from_dict(read.to_dict(), read.header) - mate = pysam.AlignedSegment().from_dict(mate.to_dict(), read.header) - - # Make it so that if our read is secondary, the mate is also secondary. We don't make the - # mate primary if the read is primary because we would otherwise end up with mulitple - # primary alignments. - if read.is_secondary: - mate.is_secondary = True - - # In a situation where there is already one mate for each read, then we will come across - # each pair twice - once when we scan read1 and once when we scan read2. Thus we need - # to make sure we don't output something already output. - if read.is_read1: - mate = copy_tags(options.tags, read, mate) - output_key = str(read) + str(mate) - - if output_key not in output: - output.add(output_key) - outbam.write(read) - outbam.write(mate) - skipped_stats["pairs_output"] += 1 - - elif read.is_read2: - read = copy_tags(options.tags, mate, read) - output_key = str(mate) + str(read) - - if output_key not in output: - output.add(output_key) - outbam.write(mate) - outbam.write(read) - skipped_stats["pairs_output"] += 1 - - else: - skipped_stats["skipped_not_read_12"] += 1 - U.warn( - "Alignment {} is neither read1 nor read2 -- skipped".format( - "\t".join(map(str, [read.query_name, read.flag, read.reference_name, int(read.pos)])) - ) - ) - continue - - if not out_name == "-": - outbam.close() - - U.info( - "Total pairs output: {}, Pairs skipped - no mates: {}," - " Pairs skipped - not read1 or 2: {}".format( - skipped_stats["pairs_output"], skipped_stats["no_mate"], skipped_stats["skipped_not_read12"] - ) - ) - U.Stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml b/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml index 72ffeac..84080e1 100644 --- a/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml +++ b/target/nextflow/workflows/genome_alignment_and_quant/.config.vsh.yaml @@ -459,51 +459,57 @@ dependencies: - name: "star/star_align_reads" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_sort" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_index" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_stats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_flagstat" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_idxstats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" -- name: "umitools/umitools_dedup" +- name: "umi_tools/umi_tools_dedup" repository: - type: "local" -- name: "umitools_prepareforquant" + type: "vsh" + repo: "biobox" + tag: "main" +- name: "umi_tools/umi_tools_prepareforrsem" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "salmon/salmon_quant" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "rsem/rsem_calculate_expression" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -584,8 +590,8 @@ build_info: output: "target/nextflow/workflows/genome_alignment_and_quant" executable: "target/nextflow/workflows/genome_alignment_and_quant/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_align_reads" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -593,10 +599,10 @@ build_info: - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats" - - "target/nextflow/umitools/umitools_dedup" - - "target/nextflow/umitools_prepareforquant" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" - - "target/nextflow/rsem/rsem_calculate_expression" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression" package_config: name: "rnaseq" version: "main" @@ -607,7 +613,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/genome_alignment_and_quant/main.nf b/target/nextflow/workflows/genome_alignment_and_quant/main.nf index f37112e..93868ab 100644 --- a/target/nextflow/workflows/genome_alignment_and_quant/main.nf +++ b/target/nextflow/workflows/genome_alignment_and_quant/main.nf @@ -3339,7 +3339,7 @@ meta = [ "name" : "star/star_align_reads", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3347,7 +3347,7 @@ meta = [ "name" : "samtools/samtools_sort", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3355,7 +3355,7 @@ meta = [ "name" : "samtools/samtools_index", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3363,7 +3363,7 @@ meta = [ "name" : "samtools/samtools_stats", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3371,7 +3371,7 @@ meta = [ "name" : "samtools/samtools_flagstat", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3379,34 +3379,40 @@ meta = [ "name" : "samtools/samtools_idxstats", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { - "name" : "umitools/umitools_dedup", + "name" : "umi_tools/umi_tools_dedup", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { - "name" : "umitools_prepareforquant", + "name" : "umi_tools/umi_tools_prepareforrsem", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "salmon/salmon_quant", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { "name" : "rsem/rsem_calculate_expression", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } } ], @@ -3414,7 +3420,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3510,8 +3516,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/genome_alignment_and_quant", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3528,7 +3534,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3560,10 +3566,10 @@ include { samtools_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/ma include { samtools_stats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_stats/main.nf" include { samtools_flagstat } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf" include { samtools_idxstats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf" -include { umitools_dedup } from "${meta.resources_dir}/../../../nextflow/umitools/umitools_dedup/main.nf" -include { umitools_prepareforquant } from "${meta.resources_dir}/../../../nextflow/umitools_prepareforquant/main.nf" +include { umi_tools_dedup } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_dedup/main.nf" +include { umi_tools_prepareforrsem } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_prepareforrsem/main.nf" include { salmon_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf" -include { rsem_calculate_expression } from "${meta.resources_dir}/../../../nextflow/rsem/rsem_calculate_expression/main.nf" +include { rsem_calculate_expression } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/rsem/rsem_calculate_expression/main.nf" // inner workflow // user-provided Nextflow code @@ -3653,167 +3659,169 @@ workflow run_wf { key: "genome_idxstats" ) - // - // Remove duplicate reads from BAM file based on UMIs - // - - // Deduplicate genome BAM file - | umitools_dedup.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "paired": "paired", - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "get_output_stats": "umi_dedup_stats" - ], - toState: [ "genome_bam_sorted": "output_bam" ], - key: "genome_deduped" - ) - | samtools_index.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "genome_bam_sorted", - "csi": "bam_csi_index" - ], - toState: [ "genome_bam_index": "output" ], - key: "genome_deduped" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_stats": "output" ], - key: "genome_deduped_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_flagstat": "output" ], - key: "genome_deduped_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta", - ], - toState: [ "genome_bam_idxstats": "output" ], - key: "genome_deduped_idxstats" - ) + // + // Remove duplicate reads from BAM file based on UMIs + // + + // Deduplicate genome BAM file + | umi_tools_dedup.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.genome_bam, + bai: state.genome_bam_index, + output_stats: output_stats] + }, + toState: [ "genome_bam_sorted": "output" ], + key: "genome_deduped" + ) + | samtools_index.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_deduped" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_deduped_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_deduped_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta", + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_deduped_idxstats" + ) // Deduplicate transcriptome BAM file - | samtools_sort.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ "input": "transcriptome_bam" ], - toState: [ "transcriptome_bam": "output" ], - key: "transcriptome_sorted" - ) - | samtools_index.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "csi": "bam_csi_index" - ], - toState: [ "transcriptome_bam_index": "output" ], - key: "transcriptome_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "bai": "transcriptome_bam_index", - ], - toState: [ "transcriptome_bam_stats": "output" ], - key: "transcriptome_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_flagstat": "output" ], - key: "transcriptome_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_idxstats": "output" ], - key: "transcriptome_idxstats" - ) - - | umitools_dedup.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "paired": "paired", - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index", - "get_output_stats": "umi_dedup_stats", - ], - toState: [ "transcriptome_bam_deduped": "output_bam" ], - key: "transcriptome_deduped" - ) - | samtools_sort.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ "input": "transcriptome_bam_deduped" ], - toState: [ "transcriptome_bam": "output" ], - key: "transcriptome_deduped_sorted" - ) - | samtools_index.run ( + | samtools_sort.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ], + key: "transcriptome_sorted" + ) + | samtools_index.run ( runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, fromState: [ "input": "transcriptome_bam", "csi": "bam_csi_index" ], toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index", + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_idxstats" + ) + + | umi_tools_dedup.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: { id, state -> + def output_stats = state.umi_dedup_stats ? state.id : + [ paired: state.paired, + input: state.transcriptome_bam, + bai: state.transcriptome_bam_index, + output_stats: output_stats] + }, + toState: [ "transcriptome_bam_deduped": "output" ], + key: "transcriptome_deduped" + ) + | samtools_sort.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam_deduped" ], + toState: [ "transcriptome_bam": "output" ], key: "transcriptome_deduped_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "input": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_stats": "output" ], - key: "transcriptome_deduped_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_flagstat": "output" ], - key: "transcriptome_deduped_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, - fromState: [ - "bam": "transcriptome_bam", - "bai": "transcriptome_bam_index" - ], - toState: [ "transcriptome_bam_idxstats": "output" ], - key: "transcriptome_deduped_idxstats" - ) + ) + | samtools_index.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "csi": "bam_csi_index" + ], + toState: [ "transcriptome_bam_index": "output" ], + key: "transcriptome_deduped_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "input": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_stats": "output" ], + key: "transcriptome_deduped_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_flagstat": "output" ], + key: "transcriptome_deduped_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' }, + fromState: [ + "bam": "transcriptome_bam", + "bai": "transcriptome_bam_index" + ], + toState: [ "transcriptome_bam_idxstats": "output" ], + key: "transcriptome_deduped_idxstats" + ) - // Fix paired-end reads in name sorted BAM file - | umitools_prepareforquant.run ( - runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, - fromState: [ "bam": "transcriptome_bam" ], - toState: [ "transcriptome_bam": "output" ] - ) + // Fix paired-end reads in name sorted BAM file + | umi_tools_prepareforrsem.run ( + runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' }, + fromState: [ "input": "transcriptome_bam" ], + toState: [ "transcriptome_bam": "output" ] + ) // Infer lib-type for salmon quant | map { id, state -> @@ -3850,78 +3858,91 @@ workflow run_wf { ] ) - | map { id, state -> - def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state - [ id, mod_state ] - } - - | rsem_calculate_expression.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "id": "id", - "strandedness": "strandedness", - "paired": "paired", - "input": "input", - "index": "rsem_index", - "extra_args": "extra_rsem_calculate_expression_args" - ], - toState: [ - "rsem_counts_gene": "counts_gene", - "rsem_counts_transcripts": "counts_transcripts", - "rsem_multiqc": "stat", - "star_multiqc": "logs", - "bam_star_rsem": "bam_star", - "bam_genome_rsem": "bam_genome", - "bam_transcript_rsem": "bam_transcript" - ] - ) - - // RSEM_Star BAM - | samtools_sort.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: ["input": "bam_star_rsem"], - toState: ["genome_bam_sorted": "output"], - key: "genome_sorted" - ) - | samtools_index.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "input": "genome_bam_sorted", - "csi": "bam_csi_index" - ], - toState: [ "genome_bam_index": "output" ], - key: "genome_sorted" - ) - | samtools_stats.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "input": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_stats": "output" ], - key: "genome_stats" - ) - | samtools_flagstat.run ( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_flagstat": "output" ], - key: "genome_flagstat" - ) - | samtools_idxstats.run( - runIf: { id, state -> state.aligner == 'star_rsem' }, - fromState: [ - "bam": "genome_bam_sorted", - "bai": "genome_bam_index", - "fasta": "fasta" - ], - toState: [ "genome_bam_idxstats": "output" ], - key: "genome_idxstats" - ) + | map { id, state -> + def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state + [ id, mod_state ] + } + + | rsem_calculate_expression.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "id": "id", + "strandedness": "strandedness", + "paired": "paired", + "input": "input", + "index": "rsem_index", + "counts_gene": "rsem_counts_gene", + "counts_transcripts": "rsem_counts_transcripts", + "stat": "rsem_multiqc", + "logs": "star_multiqc", + "bam_star": "bam_star_rsem", + "bam_genome": "bam_genome_rsem", + "bam_transcript": "bam_transcript_rsem" + ], + args: [ + star: true, + star_output_genome_bam: true, + star_gzipped_read_file: true, + estimate_rspd: true, + seed: 1 + ], + toState: [ + "rsem_counts_gene": "counts_gene", + "rsem_counts_transcripts": "counts_transcripts", + "rsem_multiqc": "stat", + "star_multiqc": "logs", + "bam_star_rsem": "bam_star", + "bam_genome_rsem": "bam_genome", + "bam_transcript_rsem": "bam_transcript" + ] + ) + + // RSEM_Star BAM + | samtools_sort.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: ["input": "bam_star_rsem"], + toState: ["genome_bam_sorted": "output"], + key: "genome_sorted" + ) + | samtools_index.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "csi": "bam_csi_index" + ], + toState: [ "genome_bam_index": "output" ], + key: "genome_sorted" + ) + | samtools_stats.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "input": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_stats": "output" ], + key: "genome_stats" + ) + | samtools_flagstat.run ( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_flagstat": "output" ], + key: "genome_flagstat" + ) + | samtools_idxstats.run( + runIf: { id, state -> state.aligner == 'star_rsem' }, + fromState: [ + "bam": "genome_bam_sorted", + "bai": "genome_bam_index", + "fasta": "fasta" + ], + toState: [ "genome_bam_idxstats": "output" ], + key: "genome_idxstats" + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml b/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml index 3428398..40c44cd 100644 --- a/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml +++ b/target/nextflow/workflows/merge_quant_results/.config.vsh.yaml @@ -197,7 +197,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -278,8 +278,8 @@ build_info: output: "target/nextflow/workflows/merge_quant_results" executable: "target/nextflow/workflows/merge_quant_results/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/tx2gene" - "target/nextflow/tximport" @@ -294,7 +294,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/merge_quant_results/main.nf b/target/nextflow/workflows/merge_quant_results/main.nf index f058526..e5f5c71 100644 --- a/target/nextflow/workflows/merge_quant_results/main.nf +++ b/target/nextflow/workflows/merge_quant_results/main.nf @@ -3053,7 +3053,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3149,8 +3149,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/merge_quant_results", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3167,7 +3167,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { diff --git a/target/nextflow/workflows/post_processing/.config.vsh.yaml b/target/nextflow/workflows/post_processing/.config.vsh.yaml index 3af316b..a7623bb 100644 --- a/target/nextflow/workflows/post_processing/.config.vsh.yaml +++ b/target/nextflow/workflows/post_processing/.config.vsh.yaml @@ -124,17 +124,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_bedtools_args" - description: "Extra arguments to pass to bedtools genomecov command in addition\ - \ to defaults defined by the pipeline." - info: null - default: - - "" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "boolean" name: "--bam_csi_index" description: "Create a CSI index for BAM files instead of the traditional BAI\ @@ -368,34 +357,36 @@ dependencies: - name: "samtools/samtools_sort" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_index" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_stats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_flagstat" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "samtools/samtools_idxstats" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "stringtie" repository: type: "local" -- name: "bedtools_genomecov" +- name: "bedtools/bedtools_genomecov" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "ucsc/bedclip" repository: type: "local" @@ -405,7 +396,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -486,8 +477,8 @@ build_info: output: "target/nextflow/workflows/post_processing" executable: "target/nextflow/workflows/post_processing/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/picard_markduplicates" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_sort" @@ -496,7 +487,7 @@ build_info: - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat" - "target/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats" - "target/nextflow/stringtie" - - "target/nextflow/bedtools_genomecov" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov" - "target/nextflow/ucsc/bedclip" - "target/nextflow/ucsc/bedgraphtobigwig" package_config: @@ -509,7 +500,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/post_processing/main.nf b/target/nextflow/workflows/post_processing/main.nf index a22daab..b91c759 100644 --- a/target/nextflow/workflows/post_processing/main.nf +++ b/target/nextflow/workflows/post_processing/main.nf @@ -2943,18 +2943,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "--extra_bedtools_args", - "description" : "Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline.", - "default" : [ - "" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "boolean", "name" : "--bam_csi_index", @@ -3236,7 +3224,7 @@ meta = [ "name" : "samtools/samtools_sort", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3244,7 +3232,7 @@ meta = [ "name" : "samtools/samtools_index", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3252,7 +3240,7 @@ meta = [ "name" : "samtools/samtools_stats", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3260,7 +3248,7 @@ meta = [ "name" : "samtools/samtools_flagstat", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3268,7 +3256,7 @@ meta = [ "name" : "samtools/samtools_idxstats", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3279,9 +3267,11 @@ meta = [ } }, { - "name" : "bedtools_genomecov", + "name" : "bedtools/bedtools_genomecov", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { @@ -3301,7 +3291,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3397,8 +3387,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/post_processing", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3415,7 +3405,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3448,7 +3438,7 @@ include { samtools_stats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/ma include { samtools_flagstat } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_flagstat/main.nf" include { samtools_idxstats } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/samtools/samtools_idxstats/main.nf" include { stringtie } from "${meta.resources_dir}/../../../nextflow/stringtie/main.nf" -include { bedtools_genomecov } from "${meta.resources_dir}/../../../nextflow/bedtools_genomecov/main.nf" +include { bedtools_genomecov } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/bedtools/bedtools_genomecov/main.nf" include { bedclip } from "${meta.resources_dir}/../../../nextflow/ucsc/bedclip/main.nf" include { bedgraphtobigwig } from "${meta.resources_dir}/../../../nextflow/ucsc/bedgraphtobigwig/main.nf" @@ -3537,18 +3527,35 @@ workflow run_wf { // Genome-wide coverage with BEDTools - | bedtools_genomecov.run ( - runIf: { id, state -> !state.skip_bigwig }, - fromState: [ - "strandedness": "strandedness", - "bam": "processed_genome_bam", - "extra_bedtools_args": "extra_bedtools_args" - ], - toState: [ - "bedgraph_forward": "bedgraph_forward", - "bedgraph_reverse": "bedgraph_reverse" - ] - ) + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "+" + ], + toState: [ "bedgraph_forward": "output" ], + key: "bedtools_genomecov_forward" + ) + + | bedtools_genomecov.run ( + runIf: { id, state -> !state.skip_bigwig }, + fromState: [ + "input_bam": "processed_genome_bam", + ], + args: [ + split: true, + du: true, + bed_graph: true, + strand: "-" + ], + toState: [ "bedgraph_reverse": "output" ], + key: "bedtools_genomecov_reverse" + ) | bedclip.run ( runIf: { id, state -> !state.skip_bigwig }, diff --git a/target/nextflow/workflows/post_processing/nextflow_schema.json b/target/nextflow/workflows/post_processing/nextflow_schema.json index 1b74211..e9978f5 100644 --- a/target/nextflow/workflows/post_processing/nextflow_schema.json +++ b/target/nextflow/workflows/post_processing/nextflow_schema.json @@ -136,17 +136,6 @@ } - , - "extra_bedtools_args": { - "type": - "string", - "description": "Type: `string`, default: ``. Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: ``. Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline." - , - "default":"" - } - - , "bam_csi_index": { "type": diff --git a/target/nextflow/workflows/pre_processing/.config.vsh.yaml b/target/nextflow/workflows/pre_processing/.config.vsh.yaml index d182b2f..f6888d8 100644 --- a/target/nextflow/workflows/pre_processing/.config.vsh.yaml +++ b/target/nextflow/workflows/pre_processing/.config.vsh.yaml @@ -57,19 +57,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "file" - name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ - \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ - \ columns - short name and full path to reference genome(s)" - info: null - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "file" name: "--ribo_database_manifest" description: "Text file containing paths to fasta files (one per line) that will\ @@ -267,15 +254,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_trimgalore_args" - description: "Extra arguments to pass to Trim Galore! command in addition to defaults\ - \ defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--min_trimmed_reads" description: "Minimum number of trimmed reads below which samples are removed\ @@ -308,19 +286,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" -- name: "Alignment options" - arguments: - - type: "string" - name: "--extra_salmon_quant_args" - description: "Extra arguments to pass to salmon quant command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - "" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Read filtering options" arguments: - type: "boolean_true" @@ -333,19 +298,6 @@ argument_groups: description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." info: null direction: "input" -- name: "Other options" - arguments: - - type: "string" - name: "--extra_fq_subsample_args" - description: "Extra arguments to pass to fq subsample command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - "--record-count 1000000 --seed 1" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Output" arguments: - type: "file" @@ -353,7 +305,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "$id.read_1.fastq" + - "${id}_r1.fastq.gz" must_exist: false create_parent: true required: false @@ -365,7 +317,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "$id.read_2.fastq" + - "${id}_r2.fastq.gz" must_exist: false create_parent: true required: false @@ -377,7 +329,7 @@ argument_groups: description: "FastQC HTML report for read 1." info: null default: - - "$id.read_1.fastqc.html" + - "${id}_r1.fastqc.html" must_exist: false create_parent: true required: false @@ -389,7 +341,7 @@ argument_groups: description: "FastQC HTML report for read 2." info: null default: - - "$id.read_2.fastqc.html" + - "${id}_r2.fastqc.html" must_exist: false create_parent: true required: false @@ -401,7 +353,7 @@ argument_groups: description: "FastQC report archive for read 1." info: null default: - - "$id.read_1.fastqc.zip" + - "${id}_r1.fastqc.zip" must_exist: false create_parent: true required: false @@ -413,7 +365,7 @@ argument_groups: description: "FastQC report archive for read 2." info: null default: - - "$id.read_2.fastqc.zip" + - "${id}_r2.fastqc.zip" must_exist: false create_parent: true required: false @@ -424,7 +376,7 @@ argument_groups: name: "--trim_log_1" info: null default: - - "$id.read_1.trimming_report.txt" + - "${id}_r1.trimming_report.txt" must_exist: false create_parent: true required: false @@ -435,7 +387,7 @@ argument_groups: name: "--trim_log_2" info: null default: - - "$id.read_2.trimming_report.txt" + - "${id}_r2.trimming_report.txt" must_exist: false create_parent: true required: false @@ -446,7 +398,7 @@ argument_groups: name: "--trim_html_1" info: null default: - - "$id.read_1.trimmed_fastqc.html" + - "${id}_r1.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -457,7 +409,7 @@ argument_groups: name: "--trim_html_2" info: null default: - - "$id.read_2.trimmed_fastqc.html" + - "${id}_r2.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -468,7 +420,7 @@ argument_groups: name: "--trim_zip_1" info: null default: - - "$id.read_1.trimmed_fastqc.zip" + - "${id}_r1.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -479,7 +431,7 @@ argument_groups: name: "--trim_zip_2" info: null default: - - "$id.read_2.trimmed_fastqc.zip" + - "${id}_r2.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -558,41 +510,48 @@ requirements: dependencies: - name: "fastqc" repository: - type: "local" -- name: "umitools/umitools_extract" - repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "umi_tools/umi_tools_extract" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "trimgalore" repository: - type: "local" -- name: "bbmap_bbsplit" + type: "vsh" + repo: "biobox" + tag: "main" +- name: "bbmap/bbmap_bbsplit" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "sortmerna" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "fastp" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "fq_subsample" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "salmon/salmon_quant" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -673,17 +632,16 @@ build_info: output: "target/nextflow/workflows/pre_processing" executable: "target/nextflow/workflows/pre_processing/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - - "target/nextflow/fastqc" - - "target/nextflow/umitools/umitools_extract" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc" - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract" - - "target/nextflow/trimgalore" - - "target/nextflow/bbmap_bbsplit" - - "target/nextflow/sortmerna" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna" - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastp" - - "target/nextflow/fq_subsample" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" package_config: name: "rnaseq" @@ -695,7 +653,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/pre_processing/main.nf b/target/nextflow/workflows/pre_processing/main.nf index 26148bb..1b8ca78 100644 --- a/target/nextflow/workflows/pre_processing/main.nf +++ b/target/nextflow/workflows/pre_processing/main.nf @@ -2871,17 +2871,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "file", - "name" : "--bbsplit_fasta_list", - "description" : "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \\"--skip_bbsplit\\" must be explicitly set to \\"false\\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)", - "must_exist" : true, - "create_parent" : true, - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "file", "name" : "--ribo_database_manifest", @@ -3116,15 +3105,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "--extra_trimgalore_args", - "description" : "Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "integer", "name" : "--min_trimmed_reads", @@ -3163,23 +3143,6 @@ meta = [ } ] }, - { - "name" : "Alignment options", - "arguments" : [ - { - "type" : "string", - "name" : "--extra_salmon_quant_args", - "description" : "Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.", - "default" : [ - "" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - } - ] - }, { "name" : "Read filtering options", "arguments" : [ @@ -3197,23 +3160,6 @@ meta = [ } ] }, - { - "name" : "Other options", - "arguments" : [ - { - "type" : "string", - "name" : "--extra_fq_subsample_args", - "description" : "Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline.", - "default" : [ - "--record-count 1000000 --seed 1" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - } - ] - }, { "name" : "Output", "arguments" : [ @@ -3222,7 +3168,7 @@ meta = [ "name" : "--qc_output1", "description" : "Path to output directory", "default" : [ - "$id.read_1.fastq" + "${id}_r1.fastq.gz" ], "must_exist" : false, "create_parent" : true, @@ -3236,7 +3182,7 @@ meta = [ "name" : "--qc_output2", "description" : "Path to output directory", "default" : [ - "$id.read_2.fastq" + "${id}_r2.fastq.gz" ], "must_exist" : false, "create_parent" : true, @@ -3250,7 +3196,7 @@ meta = [ "name" : "--fastqc_html_1", "description" : "FastQC HTML report for read 1.", "default" : [ - "$id.read_1.fastqc.html" + "${id}_r1.fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3264,7 +3210,7 @@ meta = [ "name" : "--fastqc_html_2", "description" : "FastQC HTML report for read 2.", "default" : [ - "$id.read_2.fastqc.html" + "${id}_r2.fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3278,7 +3224,7 @@ meta = [ "name" : "--fastqc_zip_1", "description" : "FastQC report archive for read 1.", "default" : [ - "$id.read_1.fastqc.zip" + "${id}_r1.fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3292,7 +3238,7 @@ meta = [ "name" : "--fastqc_zip_2", "description" : "FastQC report archive for read 2.", "default" : [ - "$id.read_2.fastqc.zip" + "${id}_r2.fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3305,7 +3251,7 @@ meta = [ "type" : "file", "name" : "--trim_log_1", "default" : [ - "$id.read_1.trimming_report.txt" + "${id}_r1.trimming_report.txt" ], "must_exist" : false, "create_parent" : true, @@ -3318,7 +3264,7 @@ meta = [ "type" : "file", "name" : "--trim_log_2", "default" : [ - "$id.read_2.trimming_report.txt" + "${id}_r2.trimming_report.txt" ], "must_exist" : false, "create_parent" : true, @@ -3331,7 +3277,7 @@ meta = [ "type" : "file", "name" : "--trim_html_1", "default" : [ - "$id.read_1.trimmed_fastqc.html" + "${id}_r1.trimmed_fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3344,7 +3290,7 @@ meta = [ "type" : "file", "name" : "--trim_html_2", "default" : [ - "$id.read_2.trimmed_fastqc.html" + "${id}_r2.trimmed_fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3357,7 +3303,7 @@ meta = [ "type" : "file", "name" : "--trim_zip_1", "default" : [ - "$id.read_1.trimmed_fastqc.zip" + "${id}_r1.trimmed_fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3370,7 +3316,7 @@ meta = [ "type" : "file", "name" : "--trim_zip_2", "default" : [ - "$id.read_2.trimmed_fastqc.zip" + "${id}_r2.trimmed_fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3468,60 +3414,64 @@ meta = [ { "name" : "fastqc", "repository" : { - "type" : "local" - } - }, - { - "name" : "umitools/umitools_extract", - "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "umi_tools/umi_tools_extract", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { "name" : "trimgalore", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { - "name" : "bbmap_bbsplit", + "name" : "bbmap/bbmap_bbsplit", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "sortmerna", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "fastp", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { "name" : "fq_subsample", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "salmon/salmon_quant", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } } @@ -3530,7 +3480,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3626,8 +3576,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/pre_processing", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3644,7 +3594,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3670,14 +3620,13 @@ meta = [ // resolve dependencies dependencies (if any) meta["root_dir"] = getRootDir() -include { fastqc } from "${meta.resources_dir}/../../../nextflow/fastqc/main.nf" -include { umitools_extract } from "${meta.resources_dir}/../../../nextflow/umitools/umitools_extract/main.nf" +include { fastqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf" include { umi_tools_extract } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract/main.nf" -include { trimgalore } from "${meta.resources_dir}/../../../nextflow/trimgalore/main.nf" -include { bbmap_bbsplit } from "${meta.resources_dir}/../../../nextflow/bbmap_bbsplit/main.nf" -include { sortmerna } from "${meta.resources_dir}/../../../nextflow/sortmerna/main.nf" +include { trimgalore } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/trimgalore/main.nf" +include { bbmap_bbsplit } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf" +include { sortmerna } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/sortmerna/main.nf" include { fastp } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/fastp/main.nf" -include { fq_subsample } from "${meta.resources_dir}/../../../nextflow/fq_subsample/main.nf" +include { fq_subsample } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/fq_subsample/main.nf" include { salmon_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf" // inner workflow @@ -3696,48 +3645,58 @@ workflow run_wf { [ id, state + [paired: paired, input: input] ] } - // Perform QC on input fastq files | fastqc.run ( runIf: { id, state -> !state.skip_qc && !state.skip_fastqc }, - fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ paired: state.paired, - input: input ] - }, - toState: [ - "fastqc_html_1": "fastqc_html_1", - "fastqc_html_2": "fastqc_html_2", - "fastqc_zip_1": "fastqc_zip_1", - "fastqc_zip_2": "fastqc_zip_2" - ] + fromState: [ "input": "input" ], + toState: {id, output_state, state -> + def newKeys = [ + "fastqc_html_1":output_state["html"][0], + "fastqc_html_2": output_state["html"][1], + "fastqc_zip_1": output_state["zip"][0], + "fastqc_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"] ) // Extract UMIs from fastq files and discard read 1 or read 2 if required - | umitools_extract.run ( + | umi_tools_extract.run ( runIf: { id, state -> state.with_umi && !state.skip_umi_extract }, fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - def bc_pattern = state.paired ? [ state.umitools_bc_pattern, state.umitools_bc_pattern2 ] : [ state.umitools_bc_pattern ] - [ paired: state.paired, - input: input, - bc_pattern: bc_pattern, - umi_discard_read: state.umi_discard_read ] + def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2) + def output = "${id}.r1.fastq.gz" + def read2_out = state.paired ? "${id}.r2.fastq.gz" : state.remove(state.fastq_2) + [ input: state.fastq_1, + read2_in: state.fastq_2, + bc_pattern: state.umitools_bc_pattern, + bc_pattern2: bc_pattern2, + extract_method: state.umitools_extract_method, + umi_separator: state.umitools_umi_separator, + grouping_method: state.umitools_grouping_method, + output: output, + read2_out: read2_out ] }, toState: [ - "fastq_1": "fastq_1", - "fastq_2": "fastq_2" + "fastq_1": "output", + "fastq_2": "read2_out" ] ) // Discard read if required | map { id, state -> def paired = state.paired + def fastq_1 = state.fastq_1 def fastq_2 = state.fastq_2 if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) { - fastq_2 = state.remove(state.fastq_2) + if (state.umi_discard_read == 1) { + fastq_1 = fastq_2 + } + fastq_2 = state.remove(state.fastq_2) paired = false } - [ id, state + [paired: paired, fastq_2: fastq_2] ] + [ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ] } // Trim reads using Trim galore! @@ -3747,8 +3706,11 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] [ paired: state.paired, input: input, - min_trimmed_reads: state.min_trimmed_reads ] + min_trimmed_reads: state.min_trimmed_reads, + trimmed_r1: state.qc_output1, + trimmed_r2: state.qc_output2 ] }, + args: [gzip: true, fastqc: true], toState: [ "fastq_1": "trimmed_r1", "fastq_2": "trimmed_r2", @@ -3758,21 +3720,22 @@ workflow run_wf { "trim_zip_2": "trimmed_fastqc_zip_2", "trim_html_1": "trimmed_fastqc_html_1", "trim_html_2": "trimmed_fastqc_html_2" - ], - args: [gzip: true, fastqc: true] + ] ) // Trim reads using fastp | fastp.run( runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, - fromState: [ - "in1": "fastq_1", - "in2": "fastq_2", - "merge": "fastp_save_merged", - "interleaved_in": "interleaved_reads", - "detect_adapter_for_pe": "fastp_pe_detect_adapter", - "adapter_fasta": "fastp_adapter_fasta" - ], + fromState: { id, state -> + def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState + [ in1: state.fastq_1, + in2: state.fastq_2, + merge: state.fastp_save_merged, + interleaved_in: state.interleaved_reads, + detect_adapter_for_pe: state.paired, + adapter_fasta: state.fastp_adapter_fasta ] + outputState + }, toState: [ "fastq_1": "out1", "fastq_2": "out2", @@ -3786,19 +3749,23 @@ workflow run_wf { ) // Perform FASTQC on reads trimmed using fastp - | fastqc.run( + | fastqc.run ( runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" }, fromState: { id, state -> def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ paired: state.paired, - input: input ] - }, - toState: [ - "trim_html_1": "fastqc_html_1", - "trim_html_2": "fastqc_html_2", - "trim_zip_1": "fastqc_zip_1", - "trim_zip_2": "fastqc_zip_2" - ], + [ input: input ] + }, + toState: {id, output_state, state -> + def newKeys = [ + "trim_html_1":output_state["html"][0], + "trim_html_2": output_state["html"][1], + "trim_zip_1": output_state["zip"][0], + "trim_zip_2": output_state["zip"][1] + ] + def new_state = state + newKeys + return new_state + }, + args: [html: "*.html", zip: "*.zip"], key: "fastqc_trimming" ) @@ -3809,7 +3776,7 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] [ paired: state.paired, input: input, - built_bbsplit_index: state.bbsplit_index ] + build: state.bbsplit_index ] }, args: ["only_build_index": false], toState: [ @@ -3825,27 +3792,44 @@ workflow run_wf { def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] def filePaths = state.ribo_database_manifest.readLines() def refs = filePaths.collect { it } - [ paired: state.paired, + def other = "${id}_non_rRNA_reads/" + [ paired_in: state.paired, input: input, - ribo_database_manifest: refs ] + ref: refs, + out2: state.paired, + other: other ] }, - toState: [ - "fastq_1": "fastq_1", - "fastq_2": "fastq_2", - "sortmerna_log": "sortmerna_log" - ] + args: [fastx: true, num_alignments: 1], + toState: { id, output_state, state -> + def newKeys = [ + "sortmerna_output": output_state["other"], + "sortmerna_log": output_state["log"] + ] + def new_state = state + newKeys + return new_state + } ) + | map { id, state -> + if (state.remove_ribo_rna) { + def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"} + def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"} + [ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ] + } else { + [ id, state ] + } + } // Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness | fq_subsample.run ( runIf: { id, state -> state.strandedness == 'auto' }, - fromState: { id, state -> - def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ] - [ - input: input, - extra_args: state.extra_fq_subsample_args - ] + fromState: { id, state -> + def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)] + [input_1: state.fastq_1, input_2: state.fastq_2] + outputState }, + args: [ + record_count: 1000, + seed: 1 + ], toState: [ "subsampled_fastq_1": "output_1", "subsampled_fastq_2": "output_2" @@ -3871,6 +3855,7 @@ workflow run_wf { ) [ id, state + [lib_type: lib_type] ] } + | salmon_quant.run ( runIf: { id, state -> state.strandedness == 'auto' }, fromState: { id, state -> @@ -3888,17 +3873,17 @@ workflow run_wf { toState: [ "salmon_quant_output": "output" ] ) - | map { id, state -> - def mod_state = (!state.paired) ? - [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : - [] - [ id, state + mod_state ] - } + | map { id, state -> + def mod_state = (!state.paired) ? + [trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] : + [] + [ id, state + mod_state ] + } - | map { id, state -> - def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } - [ id, mod_state ] - } + | map { id, state -> + def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } + [ id, mod_state ] + } | setState ( "fastqc_html_1": "fastqc_html_1", @@ -3914,9 +3899,6 @@ workflow run_wf { "trim_html_1": "trim_html_1", "trim_html_2": "trim_html_2", "sortmerna_log": "sortmerna_log", - "failed_trim": "failed_trim", - "failed_trim_unpaired1": "failed_trim_unpaired1", - "failed_trim_unpaired2": "failed_trim_unpaired2", "trim_json": "trim_json", "trim_html": "trim_html", "trim_merged_out": "trim_merged_out", diff --git a/target/nextflow/workflows/pre_processing/nextflow_schema.json b/target/nextflow/workflows/pre_processing/nextflow_schema.json index fa9429a..a87dd55 100644 --- a/target/nextflow/workflows/pre_processing/nextflow_schema.json +++ b/target/nextflow/workflows/pre_processing/nextflow_schema.json @@ -64,16 +64,6 @@ } - , - "bbsplit_fasta_list": { - "type": - "string", - "description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit", - "help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must be explicitly set to \"false\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)" - - } - - , "ribo_database_manifest": { "type": @@ -303,16 +293,6 @@ } - , - "extra_trimgalore_args": { - "type": - "string", - "description": "Type: `string`. Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`. Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline." - - } - - , "min_trimmed_reads": { "type": @@ -346,27 +326,6 @@ } -} -}, - - - "alignment options" : { - "title": "Alignment options", - "type": "object", - "description": "No description", - "properties": { - - - "extra_salmon_quant_args": { - "type": - "string", - "description": "Type: `string`, default: ``. Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: ``. Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline." - , - "default":"" - } - - } }, @@ -399,27 +358,6 @@ } -} -}, - - - "other options" : { - "title": "Other options", - "type": "object", - "description": "No description", - "properties": { - - - "extra_fq_subsample_args": { - "type": - "string", - "description": "Type: `string`, default: `--record-count 1000000 --seed 1`. Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: `--record-count 1000000 --seed 1`. Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline." - , - "default":"--record-count 1000000 --seed 1" - } - - } }, @@ -434,10 +372,10 @@ "qc_output1": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qc_output1.fastq`. Path to output directory", - "help_text": "Type: `file`, default: `$id.$key.qc_output1.fastq`. Path to output directory" + "description": "Type: `file`, default: `$id.$key.qc_output1.gz`. Path to output directory", + "help_text": "Type: `file`, default: `$id.$key.qc_output1.gz`. Path to output directory" , - "default":"$id.$key.qc_output1.fastq" + "default":"$id.$key.qc_output1.gz" } @@ -445,10 +383,10 @@ "qc_output2": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qc_output2.fastq`. Path to output directory", - "help_text": "Type: `file`, default: `$id.$key.qc_output2.fastq`. Path to output directory" + "description": "Type: `file`, default: `$id.$key.qc_output2.gz`. Path to output directory", + "help_text": "Type: `file`, default: `$id.$key.qc_output2.gz`. Path to output directory" , - "default":"$id.$key.qc_output2.fastq" + "default":"$id.$key.qc_output2.gz" } @@ -673,18 +611,10 @@ "$ref": "#/definitions/read trimming options" }, - { - "$ref": "#/definitions/alignment options" - }, - { "$ref": "#/definitions/read filtering options" }, - { - "$ref": "#/definitions/other options" - }, - { "$ref": "#/definitions/output" }, diff --git a/target/nextflow/workflows/prepare_genome/.config.vsh.yaml b/target/nextflow/workflows/prepare_genome/.config.vsh.yaml index 0a74fa1..6aa1ec1 100644 --- a/target/nextflow/workflows/prepare_genome/.config.vsh.yaml +++ b/target/nextflow/workflows/prepare_genome/.config.vsh.yaml @@ -87,16 +87,14 @@ argument_groups: multiple_sep: ";" - type: "file" name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ - \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ - \ columns - short name and full path to reference genome(s)" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." info: null must_exist: true create_parent: true required: false direction: "input" - multiple: false + multiple: true multiple_sep: ";" - type: "file" name: "--star_index" @@ -126,15 +124,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "extra_rsem_prepare_reference_args" - description: "Extra arguments to pass to rsem-prepare-reference command in addition\ - \ to defaults defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "file" name: "--salmon_index" description: "Path to directory or tar.gz archive for pre-built Salmon index." @@ -382,7 +371,7 @@ dependencies: - name: "gffread" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "cat_additional_fasta" repository: @@ -399,7 +388,7 @@ dependencies: - name: "rsem/rsem_prepare_reference" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "getchromsizes" repository: @@ -412,23 +401,27 @@ dependencies: - name: "star/star_genome_generate" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" -- name: "bbmap_bbsplit" +- name: "bbmap/bbmap_bbsplit" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "salmon/salmon_index" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "kallisto/kallisto_index" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -509,8 +502,8 @@ build_info: output: "target/nextflow/workflows/prepare_genome" executable: "target/nextflow/workflows/prepare_genome/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/gunzip" - "target/dependencies/vsh/vsh/biobox/main/nextflow/gffread" @@ -522,9 +515,9 @@ build_info: - "target/nextflow/getchromsizes" - "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar" - "target/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate" - - "target/nextflow/bbmap_bbsplit" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index" - - "target/nextflow/kallisto/kallisto_index" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index" package_config: name: "rnaseq" version: "main" @@ -535,7 +528,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/prepare_genome/main.nf b/target/nextflow/workflows/prepare_genome/main.nf index 658183c..d85f1f4 100644 --- a/target/nextflow/workflows/prepare_genome/main.nf +++ b/target/nextflow/workflows/prepare_genome/main.nf @@ -2900,12 +2900,12 @@ meta = [ { "type" : "file", "name" : "--bbsplit_fasta_list", - "description" : "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \\"--skip_bbsplit\\" must be explicitly set to \\"false\\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)", + "description" : "List of reference genomes (separated by \\";\\") to filter reads against with BBSplit.", "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "input", - "multiple" : false, + "multiple" : true, "multiple_sep" : ";" }, { @@ -2939,15 +2939,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "extra_rsem_prepare_reference_args", - "description" : "Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "file", "name" : "--salmon_index", @@ -3243,7 +3234,7 @@ meta = [ "name" : "gffread", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3275,7 +3266,7 @@ meta = [ "name" : "rsem/rsem_prepare_reference", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -3297,28 +3288,32 @@ meta = [ "name" : "star/star_genome_generate", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { - "name" : "bbmap_bbsplit", + "name" : "bbmap/bbmap_bbsplit", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "salmon/salmon_index", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { "name" : "kallisto/kallisto_index", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } } ], @@ -3326,7 +3321,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3422,8 +3417,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/prepare_genome", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3440,7 +3435,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3476,9 +3471,9 @@ include { rsem_prepare_reference } from "${meta.root_dir}/dependencies/vsh/vsh/b include { getchromsizes } from "${meta.resources_dir}/../../../nextflow/getchromsizes/main.nf" include { untar } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/untar/main.nf" include { star_genome_generate } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/star/star_genome_generate/main.nf" -include { bbmap_bbsplit } from "${meta.resources_dir}/../../../nextflow/bbmap_bbsplit/main.nf" +include { bbmap_bbsplit } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/bbmap/bbmap_bbsplit/main.nf" include { salmon_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_index/main.nf" -include { kallisto_index } from "${meta.resources_dir}/../../../nextflow/kallisto/kallisto_index/main.nf" +include { kallisto_index } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_index/main.nf" // inner workflow // user-provided Nextflow code @@ -3622,43 +3617,45 @@ workflow run_wf { [ id, state + [transcript_fasta: transcript_fasta] ] } - // chromosome size and fai index - | getchromsizes.run ( - fromState: [ "fasta": "fasta" ], - toState: [ - "fai": "fai", - "sizes": "sizes" - ], - key: "chromsizes", - args: [ - fai: "genome_additional.fasta.fai", - sizes: "genome_additional.fasta.sizes" - ] - ) - - // untar bbsplit index, if available - | untar.run ( - runIf: {id, state -> state.bbsplit_index}, - fromState: [ "input": "bbsplit_index" ], - toState: [ "bbsplit_index": "output" ], - key: "untar_bbsplit_index", - args: [output: "BBSplit_index"] - ) - - // create bbsplit index, if not already availble - | bbmap_bbsplit.run ( - runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, - fromState: [ - "primary_ref": "fasta", - "bbsplit_fasta_list": "bbsplit_fasta_list" - ], - toState: [ "bbsplit_index": "bbsplit_index" ], - args: [ - only_build_index: true, - bbsplit_index: "BBSplit_index" - ], - key: "generate_bbsplit_index" - ) + // chromosome size and fai index + | getchromsizes.run ( + fromState: [ "fasta": "fasta" ], + toState: [ + "fai": "fai", + "sizes": "sizes" + ], + key: "chromsizes", + args: [ + fai: "genome_additional.fasta.fai", + sizes: "genome_additional.fasta.sizes" + ] + ) + + // untar bbsplit index, if available + | untar.run ( + runIf: {id, state -> state.bbsplit_index}, + fromState: [ "input": "bbsplit_index" ], + toState: [ "bbsplit_index": "output" ], + key: "untar_bbsplit_index", + args: [output: "BBSplit_index"] + ) + + | map {id, state -> + def ref = [state.fasta] + state.bbsplit_fasta_list + [id, state + [bbsplit_ref: ref] ] + } + + // create bbsplit index, if not already availble + | bbmap_bbsplit.run ( + runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index}, + fromState: ["ref": "bbsplit_ref"], + toState: [ "bbsplit_index": "index" ], + args: [ + only_build_index: true, + index: "BBSplit_index" + ], + key: "generate_bbsplit_index" + ) // Uncompress STAR index or generate from scratch if required | untar.run ( @@ -3735,16 +3732,16 @@ workflow run_wf { args: [output: "Kallisto_index"] ) - | kallisto_index.run( - runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, - fromState: [ - "transcriptome_fasta": "transcript_fasta", - "pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size" - ], - toState: [ "kallisto_index": "kallisto_index" ], - key: "generate_kallisto_index", - args: [kallisto_index: "Kallisto_index"] - ) + | kallisto_index.run( + runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index}, + fromState: [ + "input": "transcript_fasta", + "kmer_size": "pseudo_aligner_kmer_size" + ], + toState: [ "kallisto_index": "index" ], + key: "generate_kallisto_index", + args: [index: "Kallisto_index"] + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/target/nextflow/workflows/prepare_genome/nextflow_schema.json b/target/nextflow/workflows/prepare_genome/nextflow_schema.json index d617aeb..8350750 100644 --- a/target/nextflow/workflows/prepare_genome/nextflow_schema.json +++ b/target/nextflow/workflows/prepare_genome/nextflow_schema.json @@ -97,8 +97,8 @@ "bbsplit_fasta_list": { "type": "string", - "description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit", - "help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must be explicitly set to \"false\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)" + "description": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit." } @@ -133,16 +133,6 @@ } - , - "extra_rsem_prepare_reference_args": { - "type": - "string", - "description": "Type: `string`. Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`. Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline." - - } - - , "salmon_index": { "type": diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml b/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml index bdebce6..d31b3a3 100644 --- a/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/.config.vsh.yaml @@ -116,7 +116,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length" description: "For single-end mode only, the estimated average fragment length\ \ to use for quantification with Kallisto." @@ -125,7 +125,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length_sd" description: "For single-end mode only, the estimated standard deviation of the\ \ fragment length for quantification with Kallisto." @@ -194,15 +194,17 @@ dependencies: - name: "salmon/salmon_quant" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "kallisto/kallisto_quant" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -283,11 +285,11 @@ build_info: output: "target/nextflow/workflows/pseudo_alignment_and_quant" executable: "target/nextflow/workflows/pseudo_alignment_and_quant/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" - - "target/nextflow/kallisto/kallisto_quant" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant" package_config: name: "rnaseq" version: "main" @@ -298,7 +300,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf b/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf index 6ce9b0c..e4844d4 100644 --- a/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/main.nf @@ -2938,7 +2938,7 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "integer", + "type" : "double", "name" : "--kallisto_quant_fragment_length", "description" : "For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.", "required" : false, @@ -2947,7 +2947,7 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "integer", + "type" : "double", "name" : "--kallisto_quant_fragment_length_sd", "description" : "For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.", "required" : false, @@ -3032,14 +3032,16 @@ meta = [ "name" : "salmon/salmon_quant", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { "name" : "kallisto/kallisto_quant", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } } ], @@ -3047,7 +3049,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3143,8 +3145,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/pseudo_alignment_and_quant", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -3161,7 +3163,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -3188,7 +3190,7 @@ meta = [ // resolve dependencies dependencies (if any) meta["root_dir"] = getRootDir() include { salmon_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant/main.nf" -include { kallisto_quant } from "${meta.resources_dir}/../../../nextflow/kallisto/kallisto_quant/main.nf" +include { kallisto_quant } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/kallisto/kallisto_quant/main.nf" // inner workflow // user-provided Nextflow code @@ -3251,22 +3253,32 @@ workflow run_wf { [ id, mod_state ] } - | kallisto_quant.run ( - runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, - fromState: [ - "input": "input", - "paired": "paired", - "gtf": "gtf", - "index": "kallisto_index", - "fragment_length": "kallisto_quant_fragment_length", - "fragment_length_sd": "kallisto_quant_fragment_length_sd" - ], - toState: [ - "quant_out_dir": "output", - "kallisto_quant_results_file": "quant_results_file", - "pseudo_multiqc": "log" + | kallisto_quant.run ( + runIf: { id, state -> state.pseudo_aligner == 'kallisto'}, + fromState: { id, state -> + def fr_stranded = state.strandedness == 'forward' + def rf_stranded = state.strandedness == 'reverse' + [ + input: state.input, + index: state.kallisto_index, + fragment_length: state.kallisto_quant_fragment_length, + sd: state.kallisto_quant_fragment_length_sd, + single: !state.paired, + fr_stranded: fr_stranded, + rf_stranded: rf_stranded, ] - ) + }, + args: [log: "kallisto_quant.log"], + toState: { id, output_state, state -> + def neKeys = [ + "quant_out_dir": output_state["output_dir"], + "kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv", + "pseudo_multiqc": output_state["log"] + ] + def new_state = state + newKeys + return new_state + } + ) | map { id, state -> def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() } diff --git a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json index d476fd9..a616836 100644 --- a/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json +++ b/target/nextflow/workflows/pseudo_alignment_and_quant/nextflow_schema.json @@ -122,9 +122,9 @@ , "kallisto_quant_fragment_length": { "type": - "integer", - "description": "Type: `integer`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto", - "help_text": "Type: `integer`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto." + "number", + "description": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto." } @@ -132,9 +132,9 @@ , "kallisto_quant_fragment_length_sd": { "type": - "integer", - "description": "Type: `integer`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto", - "help_text": "Type: `integer`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto." + "number", + "description": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto." } diff --git a/target/nextflow/workflows/quality_control/.config.vsh.yaml b/target/nextflow/workflows/quality_control/.config.vsh.yaml index cf1f269..1d4be06 100644 --- a/target/nextflow/workflows/quality_control/.config.vsh.yaml +++ b/target/nextflow/workflows/quality_control/.config.vsh.yaml @@ -281,15 +281,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_featurecounts_args" - description: "Extra arguments to pass to featureCounts command in addition to\ - \ defaults defined by the pipeline" - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "string" name: "--rseqc_modules" description: "Specify the RSeQC modules to run_wf" @@ -472,19 +463,6 @@ argument_groups: \ to determine tin. Only use this option if there are substantial intronic reads." info: null direction: "input" - - type: "string" - name: "--output_format" - description: "Format of the qualimap output report (PDF or HTML, default is HTML)" - info: null - default: - - "html" - required: false - choices: - - "html" - - "pdf" - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--pr_bases" description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\ @@ -1127,21 +1105,33 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_pdf" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." info: null - default: - - "$id.qualimap_output.pdf" - must_exist: false + example: + - "$id.rnaseq_qc_results.txt" + must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_dir" + name: "--qualimap_counts" + description: "Output file for computed counts." info: null - default: - - "$id.qualimap_output" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + example: + - "$id.report.html" must_exist: true create_parent: true required: false @@ -1416,13 +1406,19 @@ requirements: dependencies: - name: "rseqc/rseqc_bamstat" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "rseqc/rseqc_inferexperiment" repository: - type: "local" -- name: "rseqc/rseqc_innerdistance" + type: "vsh" + repo: "biobox" + tag: "main" +- name: "rseqc/rseqc_inner_distance" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "rseqc/rseqc_junctionannotation" repository: type: "local" @@ -1441,16 +1437,18 @@ dependencies: - name: "dupradar" repository: type: "local" -- name: "qualimap" +- name: "qualimap/qualimap_rnaseq" repository: - type: "local" + type: "vsh" + repo: "biobox" + tag: "main" - name: "preseq_lcextrap" repository: type: "local" - name: "featurecounts" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - name: "multiqc_custom_biotype" repository: @@ -1464,9 +1462,9 @@ dependencies: - name: "multiqc" repository: type: "vsh" - repo: "vsh/biobox" + repo: "biobox" tag: "main" -- name: "rsem/rsem_merge_counts" +- name: "rsem_merge_counts" repository: type: "local" - name: "workflows/merge_quant_results" @@ -1475,7 +1473,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -1556,26 +1554,26 @@ build_info: output: "target/nextflow/workflows/quality_control" executable: "target/nextflow/workflows/quality_control/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - - "target/nextflow/rseqc/rseqc_bamstat" - - "target/nextflow/rseqc/rseqc_inferexperiment" - - "target/nextflow/rseqc/rseqc_innerdistance" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance" - "target/nextflow/rseqc/rseqc_junctionannotation" - "target/nextflow/rseqc/rseqc_junctionsaturation" - "target/nextflow/rseqc/rseqc_readdistribution" - "target/nextflow/rseqc/rseqc_readduplication" - "target/nextflow/rseqc/rseqc_tin" - "target/nextflow/dupradar" - - "target/nextflow/qualimap" + - "target/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq" - "target/nextflow/preseq_lcextrap" - "target/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts" - "target/nextflow/multiqc_custom_biotype" - "target/nextflow/deseq2_qc" - "target/nextflow/prepare_multiqc_input" - "target/dependencies/vsh/vsh/biobox/main/nextflow/multiqc" - - "target/nextflow/rsem/rsem_merge_counts" + - "target/nextflow/rsem_merge_counts" - "target/nextflow/workflows/merge_quant_results" package_config: name: "rnaseq" @@ -1587,7 +1585,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/quality_control/main.nf b/target/nextflow/workflows/quality_control/main.nf index 31739b4..e2f92a4 100644 --- a/target/nextflow/workflows/quality_control/main.nf +++ b/target/nextflow/workflows/quality_control/main.nf @@ -3124,15 +3124,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "--extra_featurecounts_args", - "description" : "Extra arguments to pass to featureCounts command in addition to defaults defined by the pipeline", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "string", "name" : "--rseqc_modules", @@ -3330,22 +3321,6 @@ meta = [ "description" : "Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads.", "direction" : "input" }, - { - "type" : "string", - "name" : "--output_format", - "description" : "Format of the qualimap output report (PDF or HTML, default is HTML)", - "default" : [ - "html" - ], - "required" : false, - "choices" : [ - "html", - "pdf" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "integer", "name" : "--pr_bases", @@ -4075,11 +4050,12 @@ meta = [ }, { "type" : "file", - "name" : "--qualimap_output_pdf", - "default" : [ - "$id.qualimap_output.pdf" + "name" : "--qualimap_qc_report", + "description" : "Text file containing the RNAseq QC results.", + "example" : [ + "$id.rnaseq_qc_results.txt" ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -4088,9 +4064,21 @@ meta = [ }, { "type" : "file", - "name" : "--qualimap_output_dir", - "default" : [ - "$id.qualimap_output" + "name" : "--qualimap_counts", + "description" : "Output file for computed counts.", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_report", + "description" : "Report output file. Supported formats are PDF or HTML.", + "example" : [ + "$id.report.html" ], "must_exist" : true, "create_parent" : true, @@ -4420,19 +4408,25 @@ meta = [ { "name" : "rseqc/rseqc_bamstat", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { "name" : "rseqc/rseqc_inferexperiment", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { - "name" : "rseqc/rseqc_innerdistance", + "name" : "rseqc/rseqc_inner_distance", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { @@ -4472,9 +4466,11 @@ meta = [ } }, { - "name" : "qualimap", + "name" : "qualimap/qualimap_rnaseq", "repository" : { - "type" : "local" + "type" : "vsh", + "repo" : "biobox", + "tag" : "main" } }, { @@ -4487,7 +4483,7 @@ meta = [ "name" : "featurecounts", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, @@ -4513,12 +4509,12 @@ meta = [ "name" : "multiqc", "repository" : { "type" : "vsh", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" } }, { - "name" : "rsem/rsem_merge_counts", + "name" : "rsem_merge_counts", "repository" : { "type" : "local" } @@ -4534,7 +4530,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -4630,8 +4626,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/quality_control", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -4648,7 +4644,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -4674,23 +4670,23 @@ meta = [ // resolve dependencies dependencies (if any) meta["root_dir"] = getRootDir() -include { rseqc_bamstat } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_bamstat/main.nf" -include { rseqc_inferexperiment } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_inferexperiment/main.nf" -include { rseqc_innerdistance } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_innerdistance/main.nf" +include { rseqc_bamstat } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_bamstat/main.nf" +include { rseqc_inferexperiment } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inferexperiment/main.nf" +include { rseqc_inner_distance } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/rseqc/rseqc_inner_distance/main.nf" include { rseqc_junctionannotation } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_junctionannotation/main.nf" include { rseqc_junctionsaturation } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_junctionsaturation/main.nf" include { rseqc_readdistribution } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_readdistribution/main.nf" include { rseqc_readduplication } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_readduplication/main.nf" include { rseqc_tin } from "${meta.resources_dir}/../../../nextflow/rseqc/rseqc_tin/main.nf" include { dupradar } from "${meta.resources_dir}/../../../nextflow/dupradar/main.nf" -include { qualimap } from "${meta.resources_dir}/../../../nextflow/qualimap/main.nf" +include { qualimap_rnaseq } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/qualimap/qualimap_rnaseq/main.nf" include { preseq_lcextrap } from "${meta.resources_dir}/../../../nextflow/preseq_lcextrap/main.nf" include { featurecounts } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/featurecounts/main.nf" include { multiqc_custom_biotype } from "${meta.resources_dir}/../../../nextflow/multiqc_custom_biotype/main.nf" include { deseq2_qc } from "${meta.resources_dir}/../../../nextflow/deseq2_qc/main.nf" include { prepare_multiqc_input } from "${meta.resources_dir}/../../../nextflow/prepare_multiqc_input/main.nf" include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/main/nextflow/multiqc/main.nf" -include { rsem_merge_counts } from "${meta.resources_dir}/../../../nextflow/rsem/rsem_merge_counts/main.nf" +include { rsem_merge_counts } from "${meta.resources_dir}/../../../nextflow/rsem_merge_counts/main.nf" include { merge_quant_results } from "${meta.resources_dir}/../../../nextflow/workflows/merge_quant_results/main.nf" // inner workflow @@ -4736,145 +4732,145 @@ workflow run_wf { ] ) - | multiqc_custom_biotype.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, - fromState: [ - "id": "id", - "biocounts": "featurecounts", - "biotypes_header": "biotypes_header" - ], - toState: [ - "featurecounts_multiqc": "featurecounts_multiqc", - "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" - ] - ) - - | preseq_lcextrap.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, - fromState: [ - "paired": "paired", - "input": "genome_bam", - "extra_preseq_args": "extra_preseq_args" - ], - toState: [ "preseq_output": "output" ] - ) - - | rseqc_bamstat.run ( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "map_qual": "map_qual" - ], - toState: [ "bamstat_output": "output" ] - ) - | rseqc_inferexperiment.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sample_size": "sample_size", - "map_qual": "map_qual" - ], - toState: [ "strandedness_output": "output" ] - ) - // Get predicted strandedness from the RSeQC infer_experiment.py output - | map { id, state -> - def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) - def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true - [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] - } - | rseqc_innerdistance.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, - key: "inner_distance", - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sample_size": "sample_size", - "map_qual": "map_qual", - "lower_bound_size": "lower_bound_size", - "upper_bound_size": "upper_bound_size", - "step_size": "step_size" - ], - toState: [ - "inner_dist_output_stats": "output_stats", - "inner_dist_output_dist": "output_dist", - "inner_dist_output_freq": "output_freq", - "inner_dist_output_plot": "output_plot", - "inner_dist_output_plot_r": "output_plot_r" - ] - ) - | rseqc_junctionannotation.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "map_qual": "map_qual", - "min_intron": "min_intron" - ], - toState: [ - "junction_annotation_output_log": "output_log", - "junction_annotation_output_plot_r": "output_plot_r", - "junction_annotation_output_junction_bed": "output_junction_bed", - "junction_annotation_output_junction_interact": "output_junction_interact", - "junction_annotation_output_junction_sheet": "output_junction_sheet", - "junction_annotation_output_splice_events_plot": "output_splice_events_plot", - "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" - ] - ) - | rseqc_junctionsaturation.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", - "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", - "sampling_percentile_step": "sampling_percentile_step", - "min_intron": "min_intron", - "min_splice_read": "min_splice_read", - "map_qual": "map_qual" - ], - toState: [ - "junction_saturation_output_plot_r": "output_plot_r", - "junction_saturation_output_plot": "output_plot" - ] - ) - | rseqc_readdistribution.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "refgene": "gene_bed", - ], - toState: [ "read_distribution_output": "output" ] - ) - | rseqc_readduplication.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "read_count_upper_limit": "read_count_upper_limit", - "map_qual": "map_qual" - ], - toState: [ - "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", - "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", - "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", - "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" - ] - ) - | rseqc_tin.run( - runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, - fromState: [ - "bam_input": "genome_bam", - "bai_input": "genome_bam_index", - "refgene": "gene_bed", - "minimum_coverage": "minimum_coverage", - "sample_size": "tin_sample_size", - "subtract_background": "subtract_background" - ], - toState: [ - "tin_output_summary": "output_tin_summary", - "tin_output_metrics": "output_tin" - ] - ) + | multiqc_custom_biotype.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align }, + fromState: [ + "id": "id", + "biocounts": "featurecounts", + "biotypes_header": "biotypes_header" + ], + toState: [ + "featurecounts_multiqc": "featurecounts_multiqc", + "featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc" + ] + ) + + | preseq_lcextrap.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align }, + fromState: [ + "paired": "paired", + "input": "genome_bam", + "extra_preseq_args": "extra_preseq_args" + ], + toState: [ "preseq_output": "output" ] + ) + + | rseqc_bamstat.run ( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "mapq": "map_qual" + ], + toState: [ "bamstat_output": "output" ] + ) + | rseqc_inferexperiment.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual" + ], + toState: [ "strandedness_output": "output" ] + ) + // Get predicted strandedness from the RSeQC infer_experiment.py output + | map { id, state -> + def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30) + def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true + [ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ] + } + | rseqc_inner_distance.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align }, + key: "inner_distance", + fromState: [ + "input_file": "genome_bam", + "refgene": "gene_bed", + "sample_size": "sample_size", + "mapq": "map_qual", + "lower_bound": "lower_bound_size", + "upper_bound": "upper_bound_size", + "step": "step_size" + ], + toState: [ + "inner_dist_output_stats": "output_stats", + "inner_dist_output_dist": "output_dist", + "inner_dist_output_freq": "output_freq", + "inner_dist_output_plot": "output_plot", + "inner_dist_output_plot_r": "output_plot_r" + ] + ) + | rseqc_junctionannotation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "map_qual": "map_qual", + "min_intron": "min_intron" + ], + toState: [ + "junction_annotation_output_log": "output_log", + "junction_annotation_output_plot_r": "output_plot_r", + "junction_annotation_output_junction_bed": "output_junction_bed", + "junction_annotation_output_junction_interact": "output_junction_interact", + "junction_annotation_output_junction_sheet": "output_junction_sheet", + "junction_annotation_output_splice_events_plot": "output_splice_events_plot", + "junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot" + ] + ) + | rseqc_junctionsaturation.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + "sampling_percentile_lower_bound": "sampling_percentile_lower_bound", + "sampling_percentile_upper_bound": "sampling_percentile_upper_bound", + "sampling_percentile_step": "sampling_percentile_step", + "min_intron": "min_intron", + "min_splice_read": "min_splice_read", + "map_qual": "map_qual" + ], + toState: [ + "junction_saturation_output_plot_r": "output_plot_r", + "junction_saturation_output_plot": "output_plot" + ] + ) + | rseqc_readdistribution.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "refgene": "gene_bed", + ], + toState: [ "read_distribution_output": "output" ] + ) + | rseqc_readduplication.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "input": "genome_bam", + "read_count_upper_limit": "read_count_upper_limit", + "map_qual": "map_qual" + ], + toState: [ + "read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r", + "read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot", + "read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping", + "read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence" + ] + ) + | rseqc_tin.run( + runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align }, + fromState: [ + "bam_input": "genome_bam", + "bai_input": "genome_bam_index", + "refgene": "gene_bed", + "minimum_coverage": "minimum_coverage", + "sample_size": "tin_sample_size", + "subtract_background": "subtract_background" + ], + toState: [ + "tin_output_summary": "output_tin_summary", + "tin_output_metrics": "output_tin" + ] + ) | dupradar.run( runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align }, @@ -4896,23 +4892,25 @@ workflow run_wf { ] ) - | qualimap.run( - runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align }, - fromState: [ - "input": "genome_bam", - "gtf": "gtf", - "pr_bases": "pr_bases", - "tr_bias": "tr_bias", - "algorithm": "algorithm", - "sequencing_protocol": "sequencing_protocol", - "sorted": "sorted", - "java_memory_size": "java_memory_size", - ], - toState: [ - "qualimap_output_pdf": "output_pdf", - "qualimap_output_dir": "output_dir" - ] - ) + // TODO: Add outdir as an output argument to the qualimap module on biobox. + // Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost. + | qualimap_rnaseq.run( + fromState: [ + "bam": "genome_bam", + "gtf": "gtf", + "num_pr_bases": "pr_bases", + "num_tr_bias": "tr_bias", + "algorithm": "algorithm", + "sequencing_protocol": "sequencing_protocol", + "sorted": "sorted", + "java_memory_size": "java_memory_size", + ], + toState: [ + "qualimap_report": "report", + "qualimap_qc_report": "qc_report", + "qualimap_counts": "counts" + ] + ) merged_ch = qc_ch | toSortedList @@ -5035,10 +5033,10 @@ workflow run_wf { (state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ? state.preseq_output : null } - def qualimap_output_dir = list.collect { id, state -> - (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? - state.qualimap_output_dir : - null } + // def qualimap_output_dir = list.collect { id, state -> + // (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ? + // state.qualimap_output_dir : + // null } def dupradar_output_dup_intercept_mqc = list.collect { id, state -> (state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ? state.dupradar_output_dup_intercept_mqc : @@ -5123,7 +5121,7 @@ workflow run_wf { featurecounts_multiqc: featurecounts_multiqc, featurecounts_rrna_multiqc: featurecounts_rrna_multiqc, preseq_output: preseq_output, - qualimap_output_dir: qualimap_output_dir, + // qualimap_output_dir: qualimap_output_dir, dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc, dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc, bamstat_output: bamstat_output, @@ -5302,7 +5300,7 @@ workflow run_wf { "pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo", "pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo", "preseq_multiqc": "preseq_output", - "qualimap_multiqc": "qualimap_output_dir", + // "qualimap_multiqc": "qualimap_output_dir", "dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc", "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "bamstat_multiqc": "bamstat_output", @@ -5402,8 +5400,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "featurecounts": "featurecounts", "featurecounts_summary": "featurecounts_summary", "featurecounts_multiqc": "featurecounts_multiqc", diff --git a/target/nextflow/workflows/quality_control/nextflow_schema.json b/target/nextflow/workflows/quality_control/nextflow_schema.json index 63cbe3c..833fc51 100644 --- a/target/nextflow/workflows/quality_control/nextflow_schema.json +++ b/target/nextflow/workflows/quality_control/nextflow_schema.json @@ -359,16 +359,6 @@ } - , - "extra_featurecounts_args": { - "type": - "string", - "description": "Type: `string`. Extra arguments to pass to featureCounts command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`. Extra arguments to pass to featureCounts command in addition to defaults defined by the pipeline" - - } - - , "rseqc_modules": { "type": @@ -534,19 +524,6 @@ } - , - "output_format": { - "type": - "string", - "description": "Type: `string`, default: `html`, choices: ``html`, `pdf``. Format of the qualimap output report (PDF or HTML, default is HTML)", - "help_text": "Type: `string`, default: `html`, choices: ``html`, `pdf``. Format of the qualimap output report (PDF or HTML, default is HTML)", - "enum": ["html", "pdf"] - - , - "default":"html" - } - - , "pr_bases": { "type": @@ -1197,24 +1174,35 @@ , - "qualimap_output_pdf": { + "qualimap_qc_report": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_output_pdf.pdf`. ", - "help_text": "Type: `file`, default: `$id.$key.qualimap_output_pdf.pdf`. " + "description": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`, example: `$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`, example: `$id.rnaseq_qc_results.txt`. Text file containing the RNAseq QC results." , - "default":"$id.$key.qualimap_output_pdf.pdf" + "default":"$id.$key.qualimap_qc_report.txt" } , - "qualimap_output_dir": { + "qualimap_counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_output_dir.qualimap_output`. ", - "help_text": "Type: `file`, default: `$id.$key.qualimap_output_dir.qualimap_output`. " + "description": "Type: `file`, default: `$id.$key.qualimap_counts.qualimap_counts`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.qualimap_counts.qualimap_counts`. Output file for computed counts." , - "default":"$id.$key.qualimap_output_dir.qualimap_output" + "default":"$id.$key.qualimap_counts.qualimap_counts" + } + + + , + "qualimap_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.qualimap_report.html`, example: `$id.report.html`. Report output file", + "help_text": "Type: `file`, default: `$id.$key.qualimap_report.html`, example: `$id.report.html`. Report output file. Supported formats are PDF or HTML." + , + "default":"$id.$key.qualimap_report.html" } diff --git a/target/nextflow/workflows/rnaseq/.config.vsh.yaml b/target/nextflow/workflows/rnaseq/.config.vsh.yaml index 90217c0..03dcdbe 100644 --- a/target/nextflow/workflows/rnaseq/.config.vsh.yaml +++ b/target/nextflow/workflows/rnaseq/.config.vsh.yaml @@ -237,24 +237,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_trimgalore_args" - description: "Extra arguments to pass to Trim Galore! command in addition to defaults\ - \ defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "string" - name: "--extra_fastp_args" - description: "Extra arguments to pass to fastp command in addition to defaults\ - \ defined by the pipeline." - info: null - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--min_trimmed_reads" description: "Minimum number of trimmed reads below which samples are removed\ @@ -271,16 +253,14 @@ argument_groups: arguments: - type: "file" name: "--bbsplit_fasta_list" - description: "Path to comma-separated file containing a list of reference genomes\ - \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ - \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ - \ columns - short name and full path to reference genome(s)" + description: "List of reference genomes (separated by \";\") to filter reads against\ + \ with BBSplit." info: null must_exist: true create_parent: true required: false direction: "input" - multiple: false + multiple: true multiple_sep: ";" - type: "file" name: "--bbsplit_index" @@ -437,7 +417,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length" description: "For single-end mode only, the estimated average fragment length\ \ to use for quantification with Kallisto." @@ -446,7 +426,7 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "integer" + - type: "double" name: "--kallisto_quant_fragment_length_sd" description: "For single-end mode only, the estimated standard deviation of the\ \ fragment length for quantification with Kallisto." @@ -470,17 +450,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_salmon_quant_args" - description: "Extra arguments to pass to salmon quant command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - "-v" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "integer" name: "--min_mapped_reads" description: "Minimum percentage of uniquely mapped reads below which samples\ @@ -530,18 +499,6 @@ argument_groups: description: "Skip all of the pseudo-alignment-based processes within the pipeline." info: null direction: "input" - - type: "string" - name: "--extra_rsem_calculate_expression_args" - description: "Extra arguments to pass to rsem-calculate-expression command in\ - \ addition to defaults defined by the pipeline." - info: null - default: - - "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed\ - \ 1" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Process skipping options" arguments: - type: "boolean" @@ -636,17 +593,6 @@ argument_groups: direction: "input" - name: "Other process arguments" arguments: - - type: "string" - name: "--extra_fq_subsample_args" - description: "Extra arguments to pass to fq subsample command in addition to defaults\ - \ defined by the pipeline." - info: null - default: - - " --record-count 1000000 --seed 1" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "string" name: "--extra_picard_args" description: "Extra arguments to pass to picard MarkDuplicates command in addition\ @@ -659,17 +605,6 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" - - type: "string" - name: "--extra_bedtools_args" - description: "Extra arguments to pass to bedtools genomecov command in addition\ - \ to defaults defined by the pipeline." - info: null - default: - - " -split -du" - required: false - direction: "input" - multiple: false - multiple_sep: ";" - type: "string" name: "--extra_preseq_args" description: "Extra arguments to pass to preseq lc_extrap command in addition\ @@ -840,7 +775,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "fastq/$id.read_1.fastq.gz" + - "fastq/${id}_r1.fastq.gz" must_exist: false create_parent: true required: false @@ -852,7 +787,7 @@ argument_groups: description: "Path to output directory" info: null default: - - "fastq/$id.read_2.fastq.gz" + - "fastq/${id}_r2.fastq.gz" must_exist: false create_parent: true required: false @@ -864,7 +799,7 @@ argument_groups: description: "FastQC HTML report for read 1." info: null default: - - "fastqc_raw/$id.read_1.fastqc.html" + - "fastqc_raw/${id}_r1.fastqc.html" must_exist: false create_parent: true required: false @@ -876,7 +811,7 @@ argument_groups: description: "FastQC HTML report for read 2." info: null default: - - "fastqc_raw/$id.read_2.fastqc.html" + - "fastqc_raw/${id}_r2.fastqc.html" must_exist: false create_parent: true required: false @@ -888,7 +823,7 @@ argument_groups: description: "FastQC report archive for read 1." info: null default: - - "fastqc_raw/$id.read_1.fastqc.zip" + - "fastqc_raw/${id}_r1.fastqc.zip" must_exist: false create_parent: true required: false @@ -900,7 +835,7 @@ argument_groups: description: "FastQC report archive for read 2." info: null default: - - "fastqc_raw/$id.read_2.fastqc.zip" + - "fastqc_raw/${id}_r2.fastqc.zip" must_exist: false create_parent: true required: false @@ -911,7 +846,7 @@ argument_groups: name: "--trim_html_1" info: null default: - - "fastqc_trim/$id.read_1.trimmed_fastqc.html" + - "fastqc_trim/${id}_r1.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -922,7 +857,7 @@ argument_groups: name: "--trim_html_2" info: null default: - - "fastqc_trim/$id.read_2.trimmed_fastqc.html" + - "fastqc_trim/${id}_r2.trimmed_fastqc.html" must_exist: false create_parent: true required: false @@ -933,7 +868,7 @@ argument_groups: name: "--trim_zip_1" info: null default: - - "fastqc_trim/$id.read_1.trimmed_fastqc.zip" + - "fastqc_trim/${id}_r1.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -944,7 +879,7 @@ argument_groups: name: "--trim_zip_2" info: null default: - - "fastqc_trim/$id.read_2.trimmed_fastqc.zip" + - "fastqc_trim/${id}_r2.trimmed_fastqc.zip" must_exist: false create_parent: true required: false @@ -955,7 +890,7 @@ argument_groups: name: "--trim_log_1" info: null default: - - "trimgalore/$id.read_1.trimming_report.txt" + - "trimgalore/${id}_r1.trimming_report.txt" must_exist: false create_parent: true required: false @@ -966,7 +901,7 @@ argument_groups: name: "--trim_log_2" info: null default: - - "trimgalore/$id.read_2.trimming_report.txt" + - "trimgalore/${id}_r2.trimming_report.txt" must_exist: false create_parent: true required: false @@ -1833,21 +1768,35 @@ argument_groups: multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_pdf" + name: "--qualimap_qc_report" + description: "Text file containing the RNAseq QC results." info: null default: - - "qualimap/$id.qualimap_output.pdf" - must_exist: false + - "Qualimap/$id.rnaseq_qc_results.txt" + must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" - name: "--qualimap_output_dir" + name: "--qualimap_counts" + description: "Output file for computed counts." info: null default: - - "qualimap/$id" + - "Qualimap/$id.counts.txt" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--qualimap_report" + description: "Report output file. Supported formats are PDF or HTML." + info: null + default: + - "Qualimap/$id.report.html" must_exist: true create_parent: true required: false @@ -2031,7 +1980,7 @@ dependencies: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" @@ -2112,8 +2061,8 @@ build_info: output: "target/nextflow/workflows/rnaseq" executable: "target/nextflow/workflows/rnaseq/main.nf" viash_version: "0.9.0" - git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3" - git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + git_commit: "0c8a7eb648edb0567b7860756b79dfbccbbac27b" + git_remote: "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/workflows/prepare_genome" - "target/nextflow/cat_fastq" @@ -2132,7 +2081,7 @@ package_config: repositories: - type: "vsh" name: "biobox" - repo: "vsh/biobox" + repo: "biobox" tag: "main" - type: "vsh" name: "craftbox" diff --git a/target/nextflow/workflows/rnaseq/main.nf b/target/nextflow/workflows/rnaseq/main.nf index 46e808f..a82ba0e 100644 --- a/target/nextflow/workflows/rnaseq/main.nf +++ b/target/nextflow/workflows/rnaseq/main.nf @@ -3073,24 +3073,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "--extra_trimgalore_args", - "description" : "Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "string", - "name" : "--extra_fastp_args", - "description" : "Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.", - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "integer", "name" : "--min_trimmed_reads", @@ -3111,12 +3093,12 @@ meta = [ { "type" : "file", "name" : "--bbsplit_fasta_list", - "description" : "Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \\"--skip_bbsplit\\" must be explicitly set to \\"false\\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)", + "description" : "List of reference genomes (separated by \\";\\") to filter reads against with BBSplit.", "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "input", - "multiple" : false, + "multiple" : true, "multiple_sep" : ";" }, { @@ -3300,7 +3282,7 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "integer", + "type" : "double", "name" : "--kallisto_quant_fragment_length", "description" : "For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.", "required" : false, @@ -3309,7 +3291,7 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "integer", + "type" : "double", "name" : "--kallisto_quant_fragment_length_sd", "description" : "For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.", "required" : false, @@ -3332,18 +3314,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "--extra_salmon_quant_args", - "description" : "Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.", - "default" : [ - "-v" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "integer", "name" : "--min_mapped_reads", @@ -3397,18 +3367,6 @@ meta = [ "name" : "--skip_pseudo_alignment", "description" : "Skip all of the pseudo-alignment-based processes within the pipeline.", "direction" : "input" - }, - { - "type" : "string", - "name" : "--extra_rsem_calculate_expression_args", - "description" : "Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.", - "default" : [ - "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" } ] }, @@ -3528,18 +3486,6 @@ meta = [ { "name" : "Other process arguments", "arguments" : [ - { - "type" : "string", - "name" : "--extra_fq_subsample_args", - "description" : "Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline.", - "default" : [ - " --record-count 1000000 --seed 1" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "string", "name" : "--extra_picard_args", @@ -3552,18 +3498,6 @@ meta = [ "multiple" : false, "multiple_sep" : ";" }, - { - "type" : "string", - "name" : "--extra_bedtools_args", - "description" : "Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline.", - "default" : [ - " -split -du" - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, { "type" : "string", "name" : "--extra_preseq_args", @@ -3767,7 +3701,7 @@ meta = [ "name" : "--output_fastq_1", "description" : "Path to output directory", "default" : [ - "fastq/$id.read_1.fastq.gz" + "fastq/${id}_r1.fastq.gz" ], "must_exist" : false, "create_parent" : true, @@ -3781,7 +3715,7 @@ meta = [ "name" : "--output_fastq_2", "description" : "Path to output directory", "default" : [ - "fastq/$id.read_2.fastq.gz" + "fastq/${id}_r2.fastq.gz" ], "must_exist" : false, "create_parent" : true, @@ -3795,7 +3729,7 @@ meta = [ "name" : "--fastqc_html_1", "description" : "FastQC HTML report for read 1.", "default" : [ - "fastqc_raw/$id.read_1.fastqc.html" + "fastqc_raw/${id}_r1.fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3809,7 +3743,7 @@ meta = [ "name" : "--fastqc_html_2", "description" : "FastQC HTML report for read 2.", "default" : [ - "fastqc_raw/$id.read_2.fastqc.html" + "fastqc_raw/${id}_r2.fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3823,7 +3757,7 @@ meta = [ "name" : "--fastqc_zip_1", "description" : "FastQC report archive for read 1.", "default" : [ - "fastqc_raw/$id.read_1.fastqc.zip" + "fastqc_raw/${id}_r1.fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3837,7 +3771,7 @@ meta = [ "name" : "--fastqc_zip_2", "description" : "FastQC report archive for read 2.", "default" : [ - "fastqc_raw/$id.read_2.fastqc.zip" + "fastqc_raw/${id}_r2.fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3850,7 +3784,7 @@ meta = [ "type" : "file", "name" : "--trim_html_1", "default" : [ - "fastqc_trim/$id.read_1.trimmed_fastqc.html" + "fastqc_trim/${id}_r1.trimmed_fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3863,7 +3797,7 @@ meta = [ "type" : "file", "name" : "--trim_html_2", "default" : [ - "fastqc_trim/$id.read_2.trimmed_fastqc.html" + "fastqc_trim/${id}_r2.trimmed_fastqc.html" ], "must_exist" : false, "create_parent" : true, @@ -3876,7 +3810,7 @@ meta = [ "type" : "file", "name" : "--trim_zip_1", "default" : [ - "fastqc_trim/$id.read_1.trimmed_fastqc.zip" + "fastqc_trim/${id}_r1.trimmed_fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3889,7 +3823,7 @@ meta = [ "type" : "file", "name" : "--trim_zip_2", "default" : [ - "fastqc_trim/$id.read_2.trimmed_fastqc.zip" + "fastqc_trim/${id}_r2.trimmed_fastqc.zip" ], "must_exist" : false, "create_parent" : true, @@ -3902,7 +3836,7 @@ meta = [ "type" : "file", "name" : "--trim_log_1", "default" : [ - "trimgalore/$id.read_1.trimming_report.txt" + "trimgalore/${id}_r1.trimming_report.txt" ], "must_exist" : false, "create_parent" : true, @@ -3915,7 +3849,7 @@ meta = [ "type" : "file", "name" : "--trim_log_2", "default" : [ - "trimgalore/$id.read_2.trimming_report.txt" + "trimgalore/${id}_r2.trimming_report.txt" ], "must_exist" : false, "create_parent" : true, @@ -4885,7 +4819,7 @@ meta = [ { "type" : "file", "name" : "--dupradar_output_duprate_exp_denscurve_mqc", - "description''' + '''" : "path to output file (pdf) of density curve of gene duplication multiqc", + "description" : "path to output file (pdf) of density curve of gene duplication multiqc", "default" : [ "dupradar/density_curve/$id.duprate_exp_density_curve_mqc.pdf" ], @@ -4925,11 +4859,12 @@ meta = [ }, { "type" : "file", - "name" : "--qualimap_output_pdf", + "name" : "--qualimap_qc_report", + "description" : "Text file containing the RNAseq QC results.", "default" : [ - "qualimap/$id.qualimap_output.pdf" + "Qualimap/$id.rnaseq_qc_results.txt" ], - "must_exist" : false, + "must_exist" : true, "create_parent" : true, "required" : false, "direction" : "output", @@ -4938,9 +4873,24 @@ meta = [ }, { "type" : "file", - "name" : "--qualimap_output_dir", + "name" : "--qualimap_counts", + "description" : "Output file for computed counts.", "default" : [ - "qualimap/$id" + "Qualimap/$id.counts.txt" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--qualimap_report", + "description" : "Report output file. Supported formats are PDF or HTML.", + "default" : [ + "Qualimap/$id.report.html" ], "must_exist" : true, "create_parent" : true, @@ -4952,7 +4902,7 @@ meta = [ { "type" : "file", "name" : "--deseq2_output", - "default" : [ + ''' + ''' "default" : [ "deseq2_qc" ], "must_exist" : true, @@ -5181,7 +5131,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -5277,8 +5227,8 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/rnaseq", "viash_version" : "0.9.0", - "git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3", - "git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq" + "git_commit" : "0c8a7eb648edb0567b7860756b79dfbccbbac27b", + "git_remote" : "https://x-access-token:ghs_7sVTZt0nXOOC3HSd5RqHBhwAcGDp1W3pcOby@github.com/viash-hub/rnaseq" }, "package_config" : { "name" : "rnaseq", @@ -5295,7 +5245,7 @@ meta = [ { "type" : "vsh", "name" : "biobox", - "repo" : "vsh/biobox", + "repo" : "biobox", "tag" : "main" }, { @@ -5752,8 +5702,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "featurecounts": "featurecounts", "featurecounts_summary": "featurecounts_summary", "featurecounts_multiqc": "featurecounts_multiqc", @@ -5867,8 +5818,9 @@ workflow run_wf { "dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc", "dupradar_output_expression_histogram": "dupradar_output_expression_histogram", "dupradar_output_intercept_slope": "dupradar_output_intercept_slope", - "qualimap_output_dir": "qualimap_output_dir", - "qualimap_output_pdf": "qualimap_output_pdf", + "qualimap_report": "qualimap_report", + "qualimap_qc_report": "qualimap_qc_report", + "qualimap_counts": "qualimap_counts", "tpm_gene": "tpm_gene", "counts_gene": "counts_gene", "counts_gene_length_scaled": "counts_gene_length_scaled", diff --git a/target/nextflow/workflows/rnaseq/nextflow_schema.json b/target/nextflow/workflows/rnaseq/nextflow_schema.json index 3ecaac1..35f6782 100644 --- a/target/nextflow/workflows/rnaseq/nextflow_schema.json +++ b/target/nextflow/workflows/rnaseq/nextflow_schema.json @@ -264,26 +264,6 @@ } - , - "extra_trimgalore_args": { - "type": - "string", - "description": "Type: `string`. Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`. Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline." - - } - - - , - "extra_fastp_args": { - "type": - "string", - "description": "Type: `string`. Extra arguments to pass to fastp command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`. Extra arguments to pass to fastp command in addition to defaults defined by the pipeline." - - } - - , "min_trimmed_reads": { "type": @@ -309,8 +289,8 @@ "bbsplit_fasta_list": { "type": "string", - "description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit", - "help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must be explicitly set to \"false\". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)" + "description": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit", + "help_text": "Type: List of `file`, multiple_sep: `\";\"`. List of reference genomes (separated by \";\") to filter reads against with BBSplit." } @@ -499,9 +479,9 @@ , "kallisto_quant_fragment_length": { "type": - "integer", - "description": "Type: `integer`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto", - "help_text": "Type: `integer`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto." + "number", + "description": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated average fragment length to use for quantification with Kallisto." } @@ -509,9 +489,9 @@ , "kallisto_quant_fragment_length_sd": { "type": - "integer", - "description": "Type: `integer`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto", - "help_text": "Type: `integer`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto." + "number", + "description": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto", + "help_text": "Type: `double`. For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto." } @@ -537,17 +517,6 @@ } - , - "extra_salmon_quant_args": { - "type": - "string", - "description": "Type: `string`, default: `-v`. Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: `-v`. Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline." - , - "default":"-v" - } - - , "min_mapped_reads": { "type": @@ -625,17 +594,6 @@ } - , - "extra_rsem_calculate_expression_args": { - "type": - "string", - "description": "Type: `string`, default: `--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1`. Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: `--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1`. Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline." - , - "default":"--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" - } - - } }, @@ -822,17 +780,6 @@ "properties": { - "extra_fq_subsample_args": { - "type": - "string", - "description": "Type: `string`, default: ` --record-count 1000000 --seed 1`. Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: ` --record-count 1000000 --seed 1`. Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline." - , - "default":" --record-count 1000000 --seed 1" - } - - - , "extra_picard_args": { "type": "string", @@ -843,17 +790,6 @@ } - , - "extra_bedtools_args": { - "type": - "string", - "description": "Type: `string`, default: ` -split -du`. Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline", - "help_text": "Type: `string`, default: ` -split -du`. Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline." - , - "default":" -split -du" - } - - , "extra_preseq_args": { "type": @@ -1974,24 +1910,35 @@ , - "qualimap_output_pdf": { + "qualimap_qc_report": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_output_pdf.pdf`. ", - "help_text": "Type: `file`, default: `$id.$key.qualimap_output_pdf.pdf`. " + "description": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`. Text file containing the RNAseq QC results", + "help_text": "Type: `file`, default: `$id.$key.qualimap_qc_report.txt`. Text file containing the RNAseq QC results." , - "default":"$id.$key.qualimap_output_pdf.pdf" + "default":"$id.$key.qualimap_qc_report.txt" } , - "qualimap_output_dir": { + "qualimap_counts": { "type": "string", - "description": "Type: `file`, default: `$id.$key.qualimap_output_dir.qualimap_output_dir`. ", - "help_text": "Type: `file`, default: `$id.$key.qualimap_output_dir.qualimap_output_dir`. " + "description": "Type: `file`, default: `$id.$key.qualimap_counts.txt`. Output file for computed counts", + "help_text": "Type: `file`, default: `$id.$key.qualimap_counts.txt`. Output file for computed counts." , - "default":"$id.$key.qualimap_output_dir.qualimap_output_dir" + "default":"$id.$key.qualimap_counts.txt" + } + + + , + "qualimap_report": { + "type": + "string", + "description": "Type: `file`, default: `$id.$key.qualimap_report.html`. Report output file", + "help_text": "Type: `file`, default: `$id.$key.qualimap_report.html`. Report output file. Supported formats are PDF or HTML." + , + "default":"$id.$key.qualimap_report.html" }