Build branch main with version main (0c8a7eb)

Build pipeline: viash-hub.rnaseq.main-nn8dl

Source commit: 0c8a7eb648

Source message: remove citation
This commit is contained in:
CI
2024-11-27 11:54:48 +00:00
parent 14e0d12189
commit 93ac6aad2e
325 changed files with 30328 additions and 46408 deletions

View File

@@ -17,7 +17,7 @@ config_mods: |
repositories:
- name: biobox
type: vsh
repo: vsh/biobox
repo: biobox
tag: main
- name: craftbox
type: vsh

View File

@@ -1,89 +0,0 @@
name: "bbmap_bbsplit"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/bbmap/bbsplit/main.nf, modules/nf-core/bbmap/bbsplit/meta.yml]
last_sha: 277bd337739a8b8f753fa7b5eda6743b9b6acb89
description: |
Split sequencing reads by mapping them to multiple references simultaneously.
argument_groups:
- name: "Input"
arguments:
- name: "--id"
type: string
description: Sample ID
- name: "--paired"
type: boolean
default: false
description: Paired fastq files or not?
- name: "--input"
type: file
multiple: true
multiple_sep: ","
description: Input fastq files, either one or two (paired)
example: sample.fastq
- name: "--primary_ref"
type: file
description: Primary reference FASTA
- name: "--bbsplit_fasta_list"
type: file
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit.
- name: "--only_build_index"
type: boolean
description: true = only build index; false = mapping
- name: "--built_bbsplit_index"
type: file
description: Directory with index files
- name: "Output"
arguments:
- name: "--fastq_1"
type: file
required: false
description: Output file for read 1.
direction: output
must_exist: false
default: $id.$key.read_1.fastq
- name: "--fastq_2"
type: file
required: false
must_exist: false
description: Output file for read 2.
direction: output
default: $id.$key.read_2.fastq
- name: "--bbsplit_index"
type: file
description: Directory with index files
direction: output
must_exist: false
default: BBSplit_index
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/minimal_test/reference/genome.fasta
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
- path: /testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa
- path: /testData/minimal_test/reference/bbsplit_fasta/human.fa
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: docker
run: |
apt-get update && \
apt-get install -y build-essential openjdk-17-jdk wget tar && \
wget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \
tar xzf BBMap_39.01.tar.gz && \
cp -r bbmap/* /usr/local/bin
runners:
- type: executable
- type: nextflow

View File

@@ -1,65 +0,0 @@
#!/bin/bash
set -eo pipefail
function clean_up {
rm -rf "$tmpdir"
}
trap clean_up EXIT
avail_mem=3072
if [ ! -d "$par_built_bbsplit_index" ]; then
other_refs=()
while IFS="," read -r name path
do
other_refs+=("ref_$name=$path")
done < "$par_bbsplit_fasta_list"
fi
if $par_only_build_index; then
if [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
bbsplit.sh \
-Xmx${avail_mem}M \
ref_primary="$par_primary_ref" ${other_refs[@]} \
path=$par_bbsplit_index \
threads=${meta_cpus:-1}
else
echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files."
fi
else
IFS="," read -ra input <<< "$par_input"
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
index_files=''
if [ -d "$par_built_bbsplit_index" ]; then
index_files="path=$par_built_bbsplit_index"
elif [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
index_files="ref_primary=$par_primary_ref ${other_refs[@]}"
else
echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files."
fi
if $par_paired; then
bbsplit.sh \
-Xmx${avail_mem}M \
$index_files \
threads=${meta_cpus:-1} \
in=${input[0]} \
in2=${input[1]} \
basename=${tmpdir}/%_#.fastq \
refstats=bbsplit_stats.txt
read1=$(find $tmpdir/ -iname primary_1*)
read2=$(find $tmpdir/ -iname primary_2*)
cp $read1 $par_fastq_1
cp $read2 $par_fastq_2
else
bbsplit.sh \
-Xmx${avail_mem}M \
$index_files \
threads=${meta_cpus:-1} \
in=${input[0]} \
basename=${tmpdir}/%.fastq \
refstats=bbsplit_stats.txt
read1=$(find $tmpdir/ -iname primary*)
cp $read1 $par_fastq_1
fi
fi

View File

@@ -1,86 +0,0 @@
#!/bin/bash
echo ">>> Test $meta_functionality_name"
cat > bbsplit_fasta_list.txt << HERE
sarscov2,$meta_resources_dir/sarscov2.fa
human,$meta_resources_dir/human.fa
HERE
echo ">>> Building BBSplit index"
"$meta_executable" \
--primary_ref "$meta_resources_dir/genome.fasta" \
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
--only_build_index true \
--bbsplit_index "BBSplit_index"
echo ">>> Check whether output exists"
[ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1
[ -z "$(ls -A 'BBSplit_index')" ] && echo "BBSplit index is empty!" && exit 1
echo ">>> Filtering ribosomal RNA reads"
echo ">>> Testing with single-end reads and primary/non-primary FASTA files"
"$meta_executable" \
--paired false \
--input "$meta_resources_dir/SRR6357070_1.fastq.gz" \
--only_build_index false \
--primary_ref "$meta_resources_dir/genome.fasta" \
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz"
echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1
rm filtered_SRR6357070_1.fastq.gz
echo ">>> Testing with paired-end reads and primary/non-primary FASTA files"
"$meta_executable" \
--paired true \
--input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \
--only_build_index false \
--primary_ref "$meta_resources_dir/genome.fasta" \
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz" \
--fastq_2 "filtered_SRR6357070_2.fastq.gz"
echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1
[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1
rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz
echo ">>> Testing with single-end reads and BBSplit index"
"$meta_executable" \
--paired false \
--input "$meta_resources_dir/SRR6357070_1.fastq.gz" \
--only_build_index false \
--built_bbsplit_index "BBSplit_index" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz"
echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1
echo ">>> Testing with paired-end reads and BBSplit index"
"$meta_executable" \
--paired true \
--input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \
--only_build_index false \
--built_bbsplit_index "BBSplit_index" \
--fastq_1 "filtered_SRR6357070_1.fastq.gz" \
--fastq_2 "filtered_SRR6357070_2.fastq.gz"
echo ">>> Check whether output exists"
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1
[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1
[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1
rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz
echo "All tests succeeded!"
exit 0

View File

@@ -1,71 +0,0 @@
name: "fastqc"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/fastqc/main.nf, modules/nf-core/fastqc/meta.yml]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.
argument_groups:
- name: "Input"
arguments:
- name: "--paired"
type: boolean
required: false
default: false
description: Paired fastq files or not?
- name: "--input"
type: file
required: true
multiple: true
multiple_sep: ","
description: Input fastq files, either one or two (paired)
example: sample.fastq
- name: "Output"
arguments:
- name: "--fastqc_html_1"
type: file
direction: output
description: FastQC HTML report for read 1.
default: $id.read_1.fastqc.html
- name: "--fastqc_html_2"
type: file
direction: output
description: FastQC HTML report for read 2.
required: false
must_exist: false
default: $id.read_2.fastqc.html
- name: "--fastqc_zip_1"
type: file
direction: output
description: FastQC report archive for read 1.
default: $id.read_1.fastqc.zip
- name: "--fastqc_zip_2"
type: file
direction: output
description: FastQC report archive for read 2.
required: false
must_exist: false
default: $id.read_2.fastqc.zip
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [ fastqc ]
runners:
- type: executable
- type: nextflow

View File

@@ -1,41 +0,0 @@
#!/bin/bash
set -eo pipefail
function clean_up {
rm -rf "$tmpdir"
}
trap clean_up EXIT
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX")
IFS="," read -ra input <<< $par_input
count=${#input[@]}
if $par_paired; then
echo "Paired - $count"
if [ $count -ne 2 ]; then
echo "Paired end input requires two files"
exit 1
fi
else
echo "Not Paired - $count"
if [ $count -ne 1 ]; then
echo "Single end input requires one file"
exit 1
fi
fi
fastqc -o $tmpdir ${input[*]}
file1=$(basename -- "${input[0]}")
read1="${file1%.fastq*}"
[[ -e "${tmpdir}/${read1}_fastqc.html" ]] && cp "${tmpdir}/${read1}_fastqc.html" $par_fastqc_html_1
[[ -e "${tmpdir}/${read1}_fastqc.zip" ]] && cp "${tmpdir}/${read1}_fastqc.zip" $par_fastqc_zip_1
if $par_paired; then
file2=$(basename -- "${input[1]}")
read2="${file2%.fastq*}"
[[ -e "${tmpdir}/${read2}_fastqc.html" ]] && cp "${tmpdir}/${read2}_fastqc.html" $par_fastqc_html_2
[[ -e "${tmpdir}/${read2}_fastqc.zip" ]] && cp "${tmpdir}/${read2}_fastqc.zip" $par_fastqc_zip_2
fi

View File

@@ -1,35 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
echo ">>> Testing for paired-end reads"
"$meta_executable" \
--paired true \
--input $meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz \
--fastqc_html_1 SRR6357070_1.html \
--fastqc_html_2 SRR6357070_2.html \
--fastqc_zip_1 SRR6357070_1.zip \
--fastqc_zip_2 SRR6357070_2.zip
echo ">> Checking if the correct files are present"
[[ ! -f "SRR6357070_1.html" ]] || [[ ! -f "SRR6357070_2.html" ]] && echo "Report file missing" && exit 1
[[ ! -s "SRR6357070_1.html" ]] || [[ ! -s "SRR6357070_2.html" ]] && echo "Report file empty" && exit 1
[[ ! -f "SRR6357070_1.zip" ]] || [[ ! -f "SRR6357070_2.zip" ]] && echo "Zip file missing" && exit 1
rm SRR6357070_1.html SRR6357070_2.html SRR6357070_1.zip SRR6357070_2.zip
echo ">>> Testing for single-end reads"
"$meta_executable" \
--paired false \
--input $meta_resources_dir/SRR6357070_1.fastq.gz \
--fastqc_html_1 SRR6357070_1.html \
--fastqc_zip_1 SRR6357070_1.zip
echo ">> Checking if the correct files are present"
[ ! -f "SRR6357070_1.html" ] && echo "Report file missing" && exit 1
[ ! -s "SRR6357070_1.html" ] && echo "Report file empty" && exit 1
[ ! -f "SRR6357070_1.zip" ] && echo "Zip file missing" && exit 1
echo ">>> Test finished successfully"
exit 0

View File

@@ -1,66 +0,0 @@
name: "fq_subsample"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/fq/subsample/main.nf, modules/nf-core/fq/subsample/meta.yml]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args
argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
description: Input fastq files to subsample
multiple: true
multiple_sep: ";"
- name: "--extra_args"
type: string
default: ""
description: Extra arguments to pass to fq subsample
- name: "Input"
arguments:
- name: "--output_1"
type: file
direction: output
default: $id.read_1.subsampled.fastq
description: Sampled read 1 fastq files
- name: "--output_2"
type: file
must_exist: false
direction: output
default: $id.read_2.subsampled.fastq
description: Sampled read 2 fastq files
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: docker
env:
- TZ=Europe/Brussels
run: |
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
apt-get update && \
apt-get install -y --no-install-recommends build-essential git-all curl && \
curl https://sh.rustup.rs -sSf | sh -s -- -y && \
. "$HOME/.cargo/env" && \
git clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \
mv fq /usr/local/ && cd /usr/local/fq && \
cargo install --locked --path . && \
mv /usr/local/fq/target/release/fq /usr/local/bin/
runners:
- type: executable
- type: nextflow

View File

@@ -1,23 +0,0 @@
#!/bin/bash
set -eo pipefail
IFS=";" read -ra input <<< $par_input
n_fastq=${#input[@]}
required_args=("-p" "--probability" "-n" "--read-count")
for arg in "${required_args[@]}"; do
if [[ "$par_extra_args" == *"$arg"* ]]; then
echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args"
exit 1
fi
done
if [ $n_fastq -eq 1 ]; then
fq subsample $par_extra_args ${input[*]} --r1-dst $par_output_1
elif [ $n_fastq -eq 2 ]; then
fq subsample $par_extra_args ${input[*]} --r1-dst $par_output_1 --r2-dst $par_output_2
else
echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!"
exit 1
fi

View File

@@ -1,32 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
echo ">>> Testing for paired-end reads"
"$meta_executable" \
--input "$meta_resources_dir/SRR6357070_1.fastq.gz;$meta_resources_dir/SRR6357070_2.fastq.gz" \
--extra_args '--record-count 1000000 --seed 1' \
--output_1 SRR6357070_1.subsampled.fastq.gz \
--output_2 SRR6357070_2.subsampled.fastq.gz
echo ">> Checking if the correct files are present"
[ ! -f "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file for read 1 is missing!" && exit 1
[ ! -s "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty!" && exit 1
[ ! -f "SRR6357070_2.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file for read 2 is missing" && exit 1
[ ! -s "SRR6357070_2.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty" && exit 1
rm SRR6357070_1.subsampled.fastq.gz SRR6357070_2.subsampled.fastq.gz
echo ">>> Testing for single-end reads"
"$meta_executable" \
--input $meta_resources_dir/SRR6357070_1.fastq.gz \
--extra_args '--record-count 1000000 --seed 1' \
--output_1 SRR6357070_1.subsampled.fastq.gz
echo ">> Checking if the correct files are present"
[ ! -f "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is missing" && exit 1
[ ! -s "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty" && exit 1
echo ">>> Tests finished successfully"
exit 0

View File

@@ -1,49 +0,0 @@
name: kallisto_index
namespace: kallisto
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/kallisto/index/main.nf, modules/nf-core/kallisto/index/meta.yml]
last_sha: c0816976384d5e7ee6079c29c45958df1ffa0ee4
description: |
Create Kallisto index.
argument_groups:
- name: "Input"
arguments:
- name: "--transcriptome_fasta"
type: file
- name: "--pseudo_aligner_kmer_size"
type: integer
description: Kmer length passed to indexing step of pseudoaligners.
- name: "Output"
arguments:
- name: "--kallisto_index"
type: file
direction: output
default: Kallisto_index
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/minimal_test/reference/transcriptome.fasta
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: docker
run: |
apt-get update && \
apt-get install -y --no-install-recommends wget && \
wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \
tar -xzf kallisto_linux-v0.50.1.tar.gz && \
mv kallisto/kallisto /usr/local/bin/
runners:
- type: executable
- type: nextflow

View File

@@ -1,8 +0,0 @@
#!/bin/bash
set -eo pipefail
kallisto index \
${par_pseudo_aligner_kmer_size:+-k $par_pseudo_aligner_kmer_size} \
-i $par_kallisto_index \
$par_transcriptome_fasta

View File

@@ -1,14 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
"$meta_executable" \
--transcriptome_fasta "$meta_resources_dir/transcriptome.fasta" \
--kallisto_index Kallisto
echo ">>> Checking whether output exists"
[ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
[ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
echo "All tests succeeded!"
exit 0

View File

@@ -1,88 +0,0 @@
name: kallisto_quant
namespace: kallisto
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/kallisto/quant/main.nf, modules/nf-core/kallisto/quant/meta.yml]
last_sha: aff1d2e02717247831644769fc3ba84868c3fdde
description: |
Computes equivalence classes for reads and quantifies abundances.
argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
multiple: true
multiple_sep: ","
description: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.
- name: "--paired"
type: boolean
description: Paired reads or not.
- name: "--strandedness"
type: string
description: Sample strand-specificity.
- name: "--index"
type: file
description: Kallisto genome index.
- name: "--gtf"
type: file
description: Optional gtf file for translation of transcripts into genomic coordinates.
- name: "--chromosomes"
type: file
description: Optional tab separated file with chromosome names and lengths.
- name: "--fragment_length"
type: integer
description: For single-end mode only, the estimated average fragment length.
- name: "--fragment_length_sd"
type: integer
description: For single-end mode only, the estimated standard deviation of the fragment length.
- name: "Output"
arguments:
- name: "--output"
type: file
description: Kallisto quant results
default: "$id.kallisto_quant_results"
direction: output
- name: "--log"
type: file
description: File containing log information from running kallisto quant
default: "$id.kallisto_quant.log.txt"
direction: output
- name: "--run_info"
type: file
description: A json file containing information about the run
default: "$id.run_info.json"
direction: output
- name: "--quant_results_file"
type: file
description: TSV file containing abundance estimates from Kallisto
direction: output
default: $id.abundance.tsv
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/minimal_test/reference/transcriptome.fasta
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: docker
run: |
apt-get update && \
apt-get install -y --no-install-recommends wget && \
wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \
tar -xzf kallisto_linux-v0.50.1.tar.gz && \
mv kallisto/kallisto /usr/local/bin/
runners:
- type: executable
- type: nextflow

View File

@@ -1,40 +0,0 @@
#!/bin/bash
set -eo pipefail
IFS="," read -ra input <<< $par_input
single_end_params=''
if [ $par_paired == "false" ]; then
if [[ $par_fragment_length < 0 ]] || [[ ! $fragment_length_sd < 0 ]]; then
echo "fragment_length and fragment_length_sd must be set for single-end data"
exit 1
fi
single_end_params="--single --fragment-length $par_fragment_length --sd $par_fragment_length_sd"
fi
strandedness=''
if [[ "$par_extra_args" != *"--fr-stranded"* ]] && [[ "$par_extra_args" != *"--rf-stranded"* ]]; then
if [ "$par_strandedness" == 'forward' ]; then
strandedness='--fr-stranded'
elif [ "$par_strandedness" == 'reverse' ]; then
strandedness='--rf-stranded'
fi
fi
mkdir -p $par_output
kallisto quant \
${meta_cpus:+--threads $meta_cpus} \
--index $par_index \
${par_gtf:+--gtf $par_gtf} \
${par_chromosomes:+--chromosomes $par_chromosomes} \
$single_end_params \
$strandedness \
$par_extra_args \
-o $par_output \
${input[*]} 2> >(tee -a ${par_output}/kallisto_quant.log >&2)
mv ${par_output}/kallisto_quant.log ${par_log}
mv ${par_output}/run_info.json ${par_run_info}
cp ${par_output}/abundance.tsv ${par_quant_results_file}

View File

@@ -1,55 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
echo ">>> Generating Kallisto index"
kallisto index \
-i index \
$meta_resources_dir/transcriptome.fasta
echo ">>> Testing for paired-end reads"
"$meta_executable" \
--index index \
--paired true \
--strandedness reverse \
--output paired_end_test \
--input "SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz" \
--log quant_pe.log \
--run_info pe_run_info.json
echo ">>> Checking whether output exists"
[ ! -d "paired_end_test" ] && echo "Kallisto results do not exist!" && exit 1
[ ! -f "quant_pe.log" ] && echo "quant_pe.log does not exist!" && exit 1
[ ! -s "quant_pe.log" ] && echo "quant_pe.log is empty!" && exit 1
[ ! -f "pe_run_info.json" ] && echo "pe_run_info.json does not exist!" && exit 1
[ ! -s "pe_run_info.json" ] && echo "pe_run_info.json is empty!" && exit 1
[ ! -f "paired_end_test/abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1
[ ! -s "paired_end_test/abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1
[ ! -f "paired_end_test/abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1
[ ! -s "paired_end_test/abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1
echo ">>> Testing for single-end reads"
"$meta_executable" \
--index index \
--paired false \
--strandedness "reverse" \
--output single_end_test \
--input "SRR6357070_1.fastq.gz" \
--log quant_se.log \
--run_info se_run_info.json \
--fragment_length 101 \
--fragment_length_sd 50
echo ">>> Checking whether output exists"
[ ! -d "single_end_test" ] && echo "Kallisto results do not exist!" && exit 1
[ ! -f "quant_se.log" ] && echo "quant_se.log does not exist!" && exit 1
[ ! -s "quant_se.log" ] && echo "quant_se.log is empty!" && exit 1
[ ! -f "se_run_info.json" ] && echo "se_run_info.json does not exist!" && exit 1
[ ! -s "se_run_info.json" ] && echo "se_run_info.json is empty!" && exit 1
[ ! -f "single_end_test/abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1
[ ! -s "single_end_test/abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1
[ ! -f "single_end_test/abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1
[ ! -s "single_end_test/abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1
echo "All tests succeeded!"
exit 0

View File

@@ -1,118 +0,0 @@
name: "qualimap"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/qualimap/rnaseq/main.nf]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
RNA-seq QC analysis using the qualimap
argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
required: true
description: path to input mapping file in BAM format.
- name: "--gtf"
type: file
required: true
description: path to annotations file in Ensembl GTF format.
- name: "Output"
arguments:
- name: "--output_dir"
direction: output
type: file
required: false
default: $id.qualimap_output
description: path to output directory for raw data and report.
- name: "--output_pdf"
type: file
direction: output
required: false
must_exist: false
default: $id.report.pdf
description: path to output file for pdf report.
- name: "--output_format"
type: string
required: false
default: html
description: Format of the output report (PDF or HTML, default is HTML)
- name: "Optional"
arguments:
- name: "--pr_bases"
type: integer
required: false
default: 100
min: 1
description: Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).
- name: "--tr_bias"
type: integer
required: false
default: 1000
min: 1
description: Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).
- name: "--algorithm"
type: string
required: false
default: uniquely-mapped-reads
description: Counting algorithm (uniquely-mapped-reads (default) or proportional).
- name: "--sequencing_protocol"
type: string
required: false
choices: ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"]
default: non-strand-specific
description: Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).
- name: "--paired"
type: boolean_true
description: Setting this flag for paired-end experiments will result in counting fragments instead of reads.
- name: "--sorted"
type: boolean_true
description: Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis.
- name: "--java_memory_size"
type: string
required: false
default: 4G
description: maximum Java heap memory size, default = 4G.
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam
- path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai
- path: /testData/unit_test_resources/genes.gtf
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [ r-base, unzip, wget, openjdk-8-jdk, libxml2-dev, libcurl4-openssl-dev ]
- type: docker
run: |
wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \
unzip qualimap_v2.3.zip && \
cp -a qualimap_v2.3/. usr/bin && \
unset DISPLAY && \
mkdir -p tmp && \
export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp
- type: r
bioc: [ NOISeqr ]
cran: [ optparse ]
runners:
- type: executable
- type: nextflow

View File

@@ -1,19 +0,0 @@
#!/bin/bash
set -eo pipefail
mkdir -p $par_output_dir
qualimap rnaseq \
--java-mem-size=$par_java_memory_size \
--algorithm $par_algorithm \
--num-pr-bases $par_pr_bases \
--num-tr-bias $par_tr_bias \
--sequencing-protocol $par_sequencing_protocol \
-bam $par_input \
-gtf $par_gtf \
${par_paired:+-pe} \
${par_sorted:+-s} \
-outdir $par_output_dir \
-outformat $par_output_format

View File

@@ -1,24 +0,0 @@
echo "> Running $meta_functionality_name."
# define input and output for script
input_bam="$meta_resources_dir/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
input_gtf="$meta_resources_dir/genes.gtf"
output_dir="qualimap_output"
"$meta_executable" \
--input "$input_bam" \
--gtf "$input_gtf" \
--output_dir "$output_dir"
exit_code=$?
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
echo ">> Checking whether output dir and files exists"
[ ! -d "$output_dir" ] && echo "Output dir could not be found!" && exit 1
[ ! -d "$output_dir/raw_data_qualimapReport" ] && echo "Raw data folder could not be found!" && exit 1
[ -z $(ls -A "$output_dir/raw_data_qualimapReport") ] && echo "Raw data folder is missing output files" && exit 1
[ ! -f "$output_dir/qualimapReport.html" ] && echo "Qualimap report was not found" && exit 1
[ ! -s "$output_dir/qualimapReport.html" ] && echo "Qualimap report is empty" && exit 1
exit 0

View File

@@ -1,135 +0,0 @@
name: "rsem_calculate_expression"
namespace: "rsem"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/rsem/calculateexpression/main.nf, modules/nf-core/rsem/calculateexpression/meta.yml]
last_sha: 92b2a7857de1dda9d1c19a088941fc81e2976ff7
description: |
Calculate expression with RSEM.
argument_groups:
- name: "Input"
arguments:
- name: "--id"
type: string
description: Sample ID.
- name: "--strandedness"
type: string
description: Sample strand-specificity. Must be one of unstranded, forward, reverse
choices: [forward, reverse, unstranded]
- name: "--paired"
type: boolean
description: Paired-end reads or not?
- name: "--input"
type: file
description: Input reads for quantification.
multiple: true
multiple_sep: ";"
- name: "--index"
type: file
description: RSEM index.
- name: "--extra_args"
type: string
description: Extra rsem-calculate-expression arguments in addition to the defaults.
- name: "Output"
arguments:
- name: "--counts_gene"
type: file
description: Expression counts on gene level
example: sample.genes.results
direction: output
- name: "--counts_transcripts"
type: file
description: Expression counts on transcript level
example: sample.isoforms.results
direction: output
- name: "--stat"
type: file
description: RSEM statistics
example: sample.stat
direction: output
- name: "--logs"
type: file
description: RSEM logs
example: sample.log
direction: output
- name: "--bam_star"
type: file
description: BAM file generated by STAR (optional)
example: sample.STAR.genome.bam
direction: output
- name: "--bam_genome"
type: file
description: Genome BAM file (optional)
example: sample.genome.bam
direction: output
- name: "--bam_transcript"
type: file
description: Transcript BAM file (optional)
example: sample.transcript.bam
direction: output
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
- path: /testData/minimal_test/reference/rsem.tar.gz
# TODO: Install bowtie/bowtie2
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages:
- build-essential
- gcc
- g++
- make
- wget
- zlib1g-dev
- unzip
- xxd
- perl
- r-base
- bowtie2
- python3-pip
- git
- type: docker
env:
- STAR_VERSION=2.7.11b
- RSEM_VERSION=1.3.3
- TZ=Europe/Brussels
run: |
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
cd /tmp && \
wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \
unzip ${STAR_VERSION}.zip && \
cd STAR-${STAR_VERSION}/source && \
make STARstatic CXXFLAGS_SIMD=-std=c++11 && \
cp STAR /usr/local/bin && \
cd /tmp && \
wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \
unzip v${RSEM_VERSION}.zip && \
cd RSEM-${RSEM_VERSION} && \
make && \
make install && \
rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \
rm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \
cd && \
apt-get clean && \
echo 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \
echo 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \
/bin/bash -c "source /etc/profile && source ~/.bashrc && echo $PATH && which STAR"
runners:
- type: executable
- type: nextflow

View File

@@ -1,41 +0,0 @@
#!/bin/bash
set -eo pipefail
function clean_up {
rm -rf "$tmpdir"
}
trap clean_up EXIT
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
[[ "$par_paired" == "false" ]] && unset par_paired
if [ $par_strandedness == 'forward' ]; then
strandedness='--strandedness forward'
elif [ $par_strandedness == 'reverse' ]; then
strandedness='--strandedness reverse'
else
strandedness=''
fi
IFS=";" read -ra input <<< $par_input
INDEX=`find -L $par_index/ -name "*.grp" | sed 's/\.grp$//'`
rsem-calculate-expression \
${meta_cpus:+--num-threads $meta_cpus} \
$strandedness \
${par_paired:+--paired-end} \
$par_extra_args \
${input[*]} \
$INDEX \
$par_id
[[ -e "${par_id}.genes.results" ]] && mv "${par_id}.genes.results" $par_counts_gene
[[ -e "${par_id}id.isoforms.results" ]] && mv "${par_id}id.isoforms.results" $par_counts_transcripts
[[ -e "${par_id}.stat" ]] && mv "${par_id}.stat" $par_stat
# [[ -e "${par_id}.log" ]] && mv "${par_id}.log" $par_logs
[[ -e "${par_id}.STAR.genome.bam" ]] && mv "${par_id}.STAR.genome.bam" $par_bam_star
[[ -e "${par_id}.genome.bam" ]] && mv "${par_id}.genome.bam" $par_bam_genome
[[ -e "${par_id}.transcript.bam" ]] && mv "${par_id}.transcript.bam" $par_bam_transcript

View File

@@ -1,26 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
tar -xavf $meta_resources_dir/rsem.tar.gz
echo ">>> Calculating expression"
"$meta_executable" \
--id WT_REP1 \
--strandedness reverse \
--paired true \
--input "$meta_resources_dir/SRR6357070_1.fastq.gz;$meta_resources_dir/SRR6357070_2.fastq.gz" \
--index rsem \
--extra_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \
--counts_gene WT_REP1.genes.results \
--counts_transctips WT_REP1.isoforms.results \
--logs WT_REP1.log
echo ">>> Checking whether output exists"
[ ! -f "WT_REP1.genes.results" ] && echo "Gene level expression counts file does not exist!" && exit 1
[ ! -s "WT_REP1.genes.results" ] && echo "Gene level expression counts file is empty!" && exit 1
[ ! -f "WT_REP1.log" ] && echo "Log file does not exist!" && exit 1
[ ! -s "WT_REP1.log" ] && echo "Log file is empty!" && exit 1
echo "All tests succeeded!"
exit 0

View File

@@ -1,5 +1,4 @@
name: "rsem_merge_counts"
namespace: "rsem"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git

View File

@@ -1,53 +0,0 @@
name: "rseqc_bamstat"
namespace: "rseqc"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/rseqc/bamstat/main.nf]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
Generate statistics from a bam file.
argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
required: true
description: input alignment file in BAM or SAM format
- name: "--map_qual"
type: integer
required: false
default: 30
description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.
min: 0
- name: "Output"
arguments:
- name: "--output"
type: file
direction: output
required: false
default: $id.mapping_quality.txt
description: output file (txt) with mapping quality statistics
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [ python3-pip ]
- type: python
packages: [ RSeQC ]
runners:
- type: executable
- type: nextflow

View File

@@ -1,8 +0,0 @@
#!/bin/bash
set -eo pipefail
bam_stat.py \
--input $par_input \
--mapq $par_map_qual \
> $par_output

View File

@@ -1,23 +0,0 @@
#!/bin/bash
# define input and output for script
input_bam="test.paired_end.sorted.bam"
output_summary="mapping_quality.txt"
# run executable and tests
echo "> Running $meta_functionality_name."
"$meta_executable" \
--input "$meta_resources_dir/$input_bam" \
--output "$output_summary"
exit_code=$?
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
echo ">> Checking whether output can be found and has content"
[ ! -f "$output_summary" ] && echo "$output_summary file missing" && exit 1
[ ! -s "$output_summary" ] && echo "$output_summary file is empty" && exit 1
exit 0

View File

@@ -1,67 +0,0 @@
name: "rseqc_inferexperiment"
namespace: "rseqc"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/rseqc/inferexperiment/main.nf]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
Infer strandedness from sequencing reads
argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
required: true
description: input alignment file in BAM or SAM format
- name: "--refgene"
type: file
required: true
description: Reference gene model in bed format
- name: "--sample_size"
type: integer
required: false
default: 200000
min: 1
description: Numer of reads sampled from SAM/BAM file, default = 200000.
- name: "--map_qual"
type: integer
required: false
default: 30
description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.
min: 0
- name: "Output"
arguments:
- name: "--output"
type: file
direction: output
required: false
default: $id.strandedness.txt
description: output file (txt) of strandness report
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam
- path: /testData/unit_test_resources/sarscov2/test.bed12
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [ python3-pip ]
- type: python
packages: [ RSeQC ]
runners:
- type: executable
- type: nextflow

View File

@@ -1,10 +0,0 @@
#!/bin/bash
set -eo pipefail
infer_experiment.py \
-i $par_input \
-r $par_refgene \
-s $par_sample_size \
-q $par_map_qual \
> $par_output

View File

@@ -1,24 +0,0 @@
#!/bin/bash
# define input and output for script
input_bam="$meta_resources_dir/test.paired_end.sorted.bam"
input_bed="$meta_resources_dir/test.bed12"
output="strandedness.txt"
# run executable and tests
echo "> Running $meta_functionality_name."
"$meta_executable" \
--input "$input_bam" \
--refgene "$input_bed" \
--output "$output"
exit_code=$?
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
echo ">> Checking whether output can be found and has content"
[ ! -f "$output" ] && echo "$output is missing" && exit 1
[ ! -s "$output" ] && echo "$output is empty" && exit 1
exit 0

View File

@@ -1,117 +0,0 @@
name: "rseqc_innerdistance"
namespace: "rseqc"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/rseqc/innerdistance/main.nf]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
Calculate inner distance between read pairs.
argument_groups:
- name: "Input"
arguments:
- name: "--input"
type: file
required: true
description: input alignment file in BAM or SAM format
- name: "--refgene"
type: file
required: true
description: Reference gene model in bed format
- name: "--sample_size"
type: integer
required: false
default: 200000
min: 1
description: Numer of reads sampled from SAM/BAM file, default = 200000.
- name: "--map_qual"
type: integer
required: false
default: 30
description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.
min: 0
- name: "--lower_bound_size"
type: integer
required: false
default: -250
description: Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250.
- name: "--upper_bound_size"
type: integer
required: false
default: 250
description: Upper bound of inner distance (bp). This option is used for ploting histograme, default=250.
- name: "--step_size"
type: integer
required: false
default: 5
description: Step size (bp) of histograme. This option is used for plotting histogram, default=5.
- name: "Output"
arguments:
- name: "--output_stats"
type: file
direction: output
required: false
must_exist: false
default: $id.inner_distance.stats
description: output file (txt) with summary statistics of inner distances of paired reads
- name: "--output_dist"
type: file
direction: output
required: false
must_exist: false
default: $id.inner_distance.txt
description: output file (txt) with inner distances of all paired reads
- name: "--output_freq"
type: file
direction: output
required: false
must_exist: false
default: $id.inner_distance_freq.txt
description: output file (txt) with frequencies of inner distances of all paired reads
- name: "--output_plot"
type: file
direction: output
required: false
must_exist: false
default: $id.inner_distance_plot.pdf
description: output file (pdf) with histogram plot of of inner distances of all paired reads
- name: "--output_plot_r"
type: file
direction: output
required: false
must_exist: false
default: $id.inner_distance_plot.r
description: output file (R) with script of histogram plot of of inner distances of all paired reads
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam
- path: /testData/unit_test_resources/sarscov2/test.bed12
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [python3-pip, r-base]
- type: python
packages: [ RSeQC ]
runners:
- type: executable
- type: nextflow

View File

@@ -1,23 +0,0 @@
#!/bin/bash
set -exo pipefail
prefix=$(openssl rand -hex 8)
inner_distance.py \
-i $par_input \
-r $par_refgene \
-o $prefix \
-k $par_sample_size \
-l $par_lower_bound_size \
-u $par_upper_bound_size \
-s $par_step_size \
-q $par_map_qual \
> stdout.txt
head -n 2 stdout.txt > $par_output_stats
[[ -f "$prefix.inner_distance.txt" ]] && mv $prefix.inner_distance.txt $par_output_dist
[[ -f "$prefix.inner_distance_plot.pdf" ]] && mv $prefix.inner_distance_plot.pdf $par_output_plot
[[ -f "$prefix.inner_distance_plot.r" ]] && mv $prefix.inner_distance_plot.r $par_output_plot_r
[[ -f "$prefix.inner_distance_freq.txt" ]] && mv $prefix.inner_distance_freq.txt $par_output_freq

View File

@@ -1,43 +0,0 @@
#!/bin/bash
gunzip "$meta_resources_dir/hg19_RefSeq.bed.gz"
# define input and output for script
input_bam="$meta_resources_dir/test.paired_end.sorted.bam"
input_bed="$meta_resources_dir/test.bed12"
output_stats="inner_distance_stats.txt"
output_dist="inner_distance.txt"
output_plot="inner_distance_plot.pdf"
output_plot_r="inner_distance_plot.r"
output_freq="inner_distance_freq.txt"
# Run executable
echo "> Running $meta_functionality_name"
"$meta_executable" \
--input $input_bam \
--refgene $input_bed \
--output_stats $output_stats \
--output_dist $output_dist \
--output_plot $output_plot \
--output_plot_r $output_plot_r \
--output_freq $output_freq
exit_code=$?
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
echo ">> asserting output has been created for paired read input"
[ ! -f "$output_stats" ] && echo "$output_stats was not created" && exit 1
[ ! -s "$output_stats" ] && echo "$output_stats is empty" && exit 1
[ ! -f "$output_dist" ] && echo "$output_dist was not created" && exit 1
[ ! -s "$output_dist" ] && echo "$output_dist is empty" && exit 1
[ ! -f "$output_plot" ] && echo "$output_plot was not created" && exit 1
[ ! -s "$output_plot" ] && echo "$output_plot is empty" && exit 1
[ ! -f "$output_plot_r" ] && echo "$output_plot_r was not created" && exit 1
[ ! -s "$output_plot_r" ] && echo "$output_plot_r is empty" && exit 1
[ ! -f "$output_freq" ] && echo "$output_freq was not created" && exit 1
[ ! -s "$output_freq" ] && echo "$output_freq is empty" && exit 1
exit 0

View File

@@ -39,3 +39,4 @@ else
fi
mv rRNA_reads.log $par_sortmerna_log

View File

@@ -38,3 +38,4 @@ echo ">> Checking if the correct files are present"
echo ">>> Test finished successfully"
exit 0

View File

@@ -1,309 +0,0 @@
name: trimgalore
description: |
A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files.
keywords: ["trimming", "adapters"]
links:
homepage: https://github.com/FelixKrueger/TrimGalore
documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
repository: https://github.com/FelixKrueger/TrimGalore
license: GPL-3.0
requirements:
commands: [trim_galore]
argument_groups:
- name: Input
arguments:
- name: "--input"
type: file
description: Input files. Note that paired-end files need to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz
required: true
multiple: true
example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq
- name: Trimming options
arguments:
- name: --quality
alternatives: -q
type: integer
description: Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal. For RRBS samples, quality trimming will be performed first, and adapter trimming is carried in a second round. Other files are quality and adapter trimmed in a single pass. The algorithm is the same as the one used by BWA (Subtract INT from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal).
example: 20
required: false
- name: --phred33
type: boolean
description: Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming.
required: false
- name: --phred64
type: boolean
description: Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming.
required: false
- name: --fastqc
type: boolean
description: Run FastQC in the default mode on the FastQ file once trimming is complete.
required: false
- name: --fastqc_args
type: string
description: Passes extra arguments to FastQC. If more than one argument is to be passed to FastQC they must be in the form "arg1 arg2 ...". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately.
required: false
example: "--nogroup --outdir /home/"
- name: --adapter
alternatives: -a
type: string
description: |
Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA.
At a special request, multiple adapters can also be specified like so:
-a " AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT" -a2 " AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT",
or so:
-a "file:../multiple_adapters.fa" -a2 "file:../different_adapters.fa"
Potentially in conjucntion with the parameter "-n 3" to trim all adapters.
example: 20
required: false
example: AGCTCCCG
- name: --adapter2
alternatives: -a2
type: string
description: Optional adapter sequence to be trimmed off read 2 of paired-end files. This option requires '--paired' to be specified as well. If the libraries to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5' adapter automatically (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA.
required: false
example: AGCTCCCG
- name: --illumina
type: boolean
description: Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence.
required: false
- name: --stranded_illumina
type: boolean
description: Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence.
required: false
- name: --nextera
type: boolean
description: Adapter sequence to be trimmed is the first 12bp of the Nextera adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence.
required: false
- name: --small_rna
type: boolean
description: Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly.
- name: --consider_already_trimmed
type: integer
description: During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed. If no adapter sequence exceeds this threshold, no additional adapter trimming will be performed (technically, the adapter is set to '-a X'). Quality trimming is still performed as usual.
required: false
- name: --max_length
type: integer
description: Discard reads that are longer than the specified value after trimming. This is only advised for smallRNA sequencing to remove non-small RNA sequences.
required: false
- name: --stringency
type: integer
description: Overlap with adapter sequence required to trim a sequence. Defaults to a very stringent setting of 1, i.e. even a single bp of overlapping sequence will be trimmed off from the 3' end of any read.
required: false
example: 1
- name: --error_rate
alternatives: -e
type: double
description: Maximum allowed error rate (no. of errors divided by the length of the matching region)
required: false
example: 0.1
- name: --gzip
type: boolean
description: Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly.
required: false
- name: --dont_gzip
type: boolean
description: Output files won't be compressed with GZIP. This option overrides --gzip.
required: false
- name: --length
type: integer
description: Discard reads that became shorter than the specified length because of either quality or adapter trimming. A value of '0' effectively disables this behaviour. For paired-end files, both reads of a read-pair need to be longer than the specified length to be printed out to validated paired-end files. If only one read became too short there is the possibility of keeping such unpaired single-end reads using the --retain_unpaired option.
required: false
example: 20
- name: --max_n
type: integer
description: The total number of Ns a read may contain before it will be removed altogether.In a paired-end setting, either read exceeding this limit will result in the entire pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, it is interpreted as a fraction of the read length.
required: false
- name: --trim_n
type: boolean
description: Removes Ns from either side of the read. This option does currently not work in RRBS mode.
required: false
- name: --no_report_file
type: boolean
description: If specified no report file will be generated.
required: false
- name: --suppress_warn
type: boolean
description: If specified any output to STDOUT or STDERR will be suppressed.
required: false
- name: --clip_R1
type: integer
description: Instructs TrimGalore to remove given number of bp from the 5' end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.
required: false
- name: --clip_R2
type: integer
description: Instructs TrimGalore to remove given number bp from the 5' end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove the first few bp because the end-repair reaction may introduce a bias towards low methylation.
required: false
- name: --three_prime_clip_R1
type: integer
description: Instructs Trim Galore to remove spacified number of bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some bias from the 3' end that is not directly related to adapter sequence or basecall quality.
required: false
- name: --three_prime_clip_R2
type: integer
description: Instructs Trim Galore to remove <int> bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.
required: false
- name: --nextseq
type: integer
description: This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. This is mutually exlusive with '-q INT'.
required: false
- name: --basename
type: string
description: Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
required: false
- name: --cores
alternatives: -j
type: integer
description: Number of cores to be used for trimming
required: false
example: 1
- name: Specific trimming options without adapter/quality trimming
arguments:
- name: --hardtrim5
type: integer
description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to <int> bp at the 5'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in .<int>_5prime.fq(.gz).
required: false
- name: --hardtrim3
type: integer
description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to <int> bp at the 3'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in .<int>_3prime.fq(.gz).
required: false
- name: --clock
type: boolean
description: In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock.
required: false
- name: --polyA
type: boolean
description: This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start ("32:A:") and end ("_PolyA:32") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of trimming.
required: false
- name: --implicon
type: boolean
description: |
This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it's current implementation, the UMI carrying reads come in the following format
Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3'
Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'
Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.
required: false
- name: RRBS-specific options
arguments:
- name: --rrbs
type: boolean
description: Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3' end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5' end (by setting '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3' MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below).
required: false
- name: --non_directional
type: boolean
description: Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well. Note that this option does not set '--clip_r2 2' in paired-end mode.
required: false
- name: --keep
type: boolean
description: Keep the quality trimmed intermediate file.
required: false
- name: Paired-end specific options
arguments:
- name: --paired
type: boolean
description: This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
required: false
- name: --retain_unpaired
type: boolean
description: If only one of the two paired-end reads became too short, the longer read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2.
required: false
- name: --length_1
alternatives: -r1
type: integer
description: Unpaired single-end read length cutoff needed for read 1 to be written to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
example: 35
required: false
- name: --length_2
alternatives: -r2
type: integer
description: Unpaired single-end read length cutoff needed for read 2 to be written to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
required: false
example: 35
- name: Output
arguments:
- name: --output_dir
alternatives: -o
type: file
description: If specified all output will be written to this directory instead of the current directory.
direction: output
required: false
default: trimmed_output
- name: --trimmed_r1
type: file
required: false
description: Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: read_1.fastq.gz
- name: --trimmed_r2
type: file
required: false
description: Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: read_2.fastq.gz
- name: --trimming_report_r1
type: file
required: false
description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: read_1.trimming_report.txt
- name: --trimming_report_r2
type: file
description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
required: false
example: read_2.trimming_report.txt
- name: --trimmed_fastqc_html_1
type: file
required: false
description: FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: read_1.fastqc.html
- name: --trimmed_fastqc_html_2
type: file
description: FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
required: false
example: read_2.fastqc.html
- name: --trimmed_fastqc_zip_1
type: file
required: false
description: FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: read_1.fastqc.zip
- name: --trimmed_fastqc_zip_2
type: file
description: FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
required: false
example: read_2.fastqc.zip
- name: --unpaired_r1
type: file
required: false
description: Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: unpaired_read_1.fastq
- name: --unpaired_r2
type: file
required: false
description: Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
direction: output
example: unpaired_read_2.fastq
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
engines:
- type: docker
image: quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0
setup:
- type: docker
run: |
echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt
runners:
- type: executable
- type: nextflow

View File

@@ -1,111 +0,0 @@
#!/bin/bash
set -eo pipefail
[[ ! -d $output_dir ]] && mkdir -p $par_output_dir
IFS=";" read -ra input <<< $par_input
unset_if_false=(
par_phred33
par_phred64
par_fastqc
par_illumina
par_stranded_illumina
par_nextera
par_small_rna
par_gzip
par_dont_gzip
par_no_report_file
par_suppress_warn
par_clock
par_polyA
par_rrbs
par_non_directional
par_keep par_paired
par_retain_unpaired
)
for par in ${unset_if_false[@]}; do
test_val="${!par}"
[[ "$test_val" == "false" ]] && unset $par
done
trim_galore \
${par_quality:+-q "${par_quality}"} \
${par_phred33:+--phred33} \
${par_phred64:+--phred64 } \
${par_fastqc:+--fastqc } \
${par_fastqc_args:+--fastqc_args "${par_fastqc_args}"} \
${par_adapter:+-a "${par_adapter}"} \
${par_adapter2:+-a2 "${par_adapter2}"} \
${par_illumina:+--illumina} \
${par_stranded_illumina:+--stranded_illumina} \
${par_nextera:+--nextera} \
${par_small_rna:+--small_rna} \
${par_consider_already_trimmed:+--consider_already_trimmed "${par_consider_already_trimmed}"} \
${par_max_length:+--max_length "${par_max_length}"} \
${par_stringency:+--stringency "${par_stringency}"} \
${par_error_rate:+-e "${par_error_rate}"} \
${par_gzip:+--gzip} \
${par_dont_gzip:+--dont_gzip} \
${par_length:+--length "${par_length}"} \
${par_max_n:+--max_n "${par_max_n}"} \
${par_trim_n:+--trim-n "${par_trim_n}"} \
${par_no_report_file:+--no_report_file} \
${par_suppress_warn:+--suppress_warn} \
${par_clip_R1:+--clip_R1 "${par_clip_R1}"} \
${par_clip_R2:+--clip_R2 "${par_clip_R2}"} \
${par_three_prime_clip_R1:+--three_prime_clip_R1 "${par_three_prime_clip_R1}"} \
${par_three_prime_clip_R2:+--three_prime_clip_R2 "${par_three_prime_clip_R2}"} \
${par_nextseq:+--nextseq "${par_nextseq}"} \
${par_basename:+-basename "${par_basename}"} \
${par_hardtrim5:+--hardtrim5 "${par_hardtrim5}"} \
${par_hardtrim3:+--hardtrim3 "${par_hardtrim3}"} \
${par_clock:+--clock} \
${par_polyA:+--polyA} \
${par_implicon:+--implicon "${par_implicon}"} \
${par_rrbs:+--rrbs} \
${par_non_directional:+--non_directional} \
${par_keep:+--keep} \
${par_paired:+--paired} \
${par_retain_unpaired:+--retain_unpaired} \
${par_length_1:+-r1 "${par_length_1}"} \
${par_length_2:+-r2 "${par_length_2}"} \
${par_cores:+-j "${par_cores}"} \
-o $par_output_dir \
${input[*]}
if [ $par_paired == "true" ]; then
input_r1=$(basename -- "${input[0]}")
input_r2=$(basename -- "${input[1]}")
[[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*val_1.f*q* $par_trimmed_r1
[[ ! -z "$par_trimmed_r2" ]] && mv $par_output_dir/*val_2.f*q* $par_trimmed_r2
[[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1
[[ ! -z "$par_trimming_report_r2" ]] && mv $par_output_dir/${input_r2}_trimming_report.txt $par_trimming_report_r2
if [ "$par_fastqc" == "true" ]; then
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*val_1_fastqc.html $par_trimmed_fastqc_html_1
[[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv $par_output_dir/*val_2_fastqc.html $par_trimmed_fastqc_html_2
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*val_1_fastqc.zip $par_trimmed_fastqc_zip_1
[[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv $par_output_dir/*val_2_fastqc.zip $par_trimmed_fastqc_zip_2
fi
if [ "$par_retain_unpaired" == "true" ]; then
[[ ! -z "$par_unpaired_r1" ]] && mv $par_output_dir/*.unpaired_1.f*q* $par_unpaired_r1
[[ ! -z "$par_unpaired_r2" ]] && mv $par_output_dir/*.unpaired_2.f*q* $par_unpaired_r2
fi
else
input_r1=$(basename -- "${input[0]}")
[[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*_trimmed.fq* $par_trimmed_r1
[[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1
if [ "$par_fastqc" == "true" ]; then
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*_trimmed_fastqc.html $par_trimmed_fastqc_html_1
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*_trimmed_fastqc.zip $par_trimmed_fastqc_zip_1
fi
fi

View File

@@ -1,127 +0,0 @@
#!/bin/bash
set -e
set -eo pipefail
# helper functions
assert_file_exists() {
[ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
}
assert_file_doesnt_exist() {
[ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
}
assert_file_empty() {
[ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
}
assert_file_not_empty() {
[ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
}
assert_file_contains() {
grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
}
assert_file_not_contains() {
grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
}
#################################################################
echo ">>> Prepare test data"
cat > example_R1.fastq <<'EOF'
@SRR6357071.22842410 22842410/1 kraken:taxid|4932
CAAGTTTTCATCTTCAACAGCTGATTGACTTCTTTGTGGTATGCCTCGATATATTTTTCTTTTTCTTTAATATCTTTATTATAGGTGATTGCCTCATCGTA
+
BBBBBFFFFFFFFFFFFFFF/BFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFBF<
@SRR6357071.52260105 52260105/1 kraken:taxid|4932
TAGACTTACCAGTACCCTTTTCGACGGCGGAAACATTCAAAATACCGTTAGAGTCGACATCGAAAGTGACTTCAATTTGTGGGACACCTCTTGGAGCTGGT
+
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/FFFFFFFFFFFFFFFF
EOF
cat > example_R2.fastq <<'EOF'
@SRR6357071.22842410 22842410/2 kraken:taxid|4932
CCGAGATCGAAGAAACGAATTCACCTGATTGCAGCTGTAAAAGCAGTAAAATCAATCAAACCAATACGGACAACCTTACGATACGATGAGGCAATCACCTA
+
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
@SRR6357071.52260105 52260105/2 kraken:taxid|4932
GTTGATTCCAAGAAACTCTACCATTCCAACTAAGAAATCCGAAGTTTTCTCTACTTATGCTGACAACCAACCAGGTGTCTTGATTCAAGTCTTTGAAGGTG
+
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
EOF
#################################################################
echo ">>> Testing for single-end reads"
"$meta_executable" \
--paired false \
--input "example_R1.fastq" \
--trimmed_fastqc_html_1 output_se_test/example.trimmed.html \
--trimmed_fastqc_zip_1 output_se_test/example.trimmed.zip \
--trimmed_r1 output_se_test/example.trimmed.fastq \
--trimming_report_r1 output_se_test/example.trimming_report.txt \
--fastqc true \
--output_dir output_se_test
echo ">> Checking output"
assert_file_exists "output_se_test/example.trimmed.html"
assert_file_exists "output_se_test/example.trimmed.zip"
assert_file_exists "output_se_test/example.trimmed.fastq"
assert_file_exists "output_se_test/example.trimming_report.txt"
echo ">> Check if output is empty"
assert_file_not_empty "output_se_test/example.trimmed.html"
assert_file_not_empty "output_se_test/example.trimmed.zip"
assert_file_not_empty "output_se_test/example.trimmed.fastq"
assert_file_not_empty "output_se_test/example.trimming_report.txt"
echo ">> Check contents"
assert_file_contains "output_se_test/example.trimmed.fastq" "@SRR6357071.22842410 22842410/1"
assert_file_contains "output_se_test/example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff"
#################################################################
echo ">>> Testing for paired-end reads"
"$meta_executable" \
--paired true \
--input "example_R1.fastq;example_R2.fastq" \
--trimmed_fastqc_html_1 output_pe_test/example_R1.trimmed.html \
--trimmed_fastqc_html_2 output_pe_test/example_R2.trimmed.html \
--trimmed_fastqc_zip_1 output_pe_test/example_R1.trimmed.zip \
--trimmed_fastqc_zip_2 output_pe_test/example_R2.trimmed.zip \
--trimmed_r1 output_pe_test/example_R1.trimmed.fastq \
--trimmed_r2 output_pe_test/example_R2.trimmed.fastq \
--trimming_report_r1 output_pe_test/example_R1.trimming_report.txt \
--trimming_report_r2 output_pe_test/example_R2.trimming_report.txt \
--fastqc true \
--output_dir output_pe_test
echo ">> Checking output"
assert_file_exists "output_pe_test/example_R1.trimmed.html"
assert_file_exists "output_pe_test/example_R2.trimmed.html"
assert_file_exists "output_pe_test/example_R1.trimmed.zip"
assert_file_exists "output_pe_test/example_R2.trimmed.zip"
assert_file_exists "output_pe_test/example_R1.trimmed.fastq"
assert_file_exists "output_pe_test/example_R2.trimmed.fastq"
assert_file_exists "output_pe_test/example_R1.trimming_report.txt"
assert_file_exists "output_pe_test/example_R2.trimming_report.txt"
echo ">> Check if output is empty"
assert_file_not_empty "output_pe_test/example_R1.trimmed.html"
assert_file_not_empty "output_pe_test/example_R2.trimmed.html"
assert_file_not_empty "output_pe_test/example_R1.trimmed.zip"
assert_file_not_empty "output_pe_test/example_R2.trimmed.zip"
assert_file_not_empty "output_pe_test/example_R1.trimmed.fastq"
assert_file_not_empty "output_pe_test/example_R2.trimmed.fastq"
assert_file_not_empty "output_pe_test/example_R1.trimming_report.txt"
assert_file_not_empty "output_pe_test/example_R2.trimming_report.txt"
echo ">> Check contents"
assert_file_contains "output_pe_test/example_R1.trimmed.fastq" "@SRR6357071.22842410 22842410/1"
assert_file_contains "output_pe_test/example_R2.trimmed.fastq" "@SRR6357071.22842410 22842410/2"
assert_file_contains "output_pe_test/example_R1.trimming_report.txt" "sequences processed in total"
assert_file_contains "output_pe_test/example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff"
#################################################################
echo ">>> Test finished successfully"
exit 0

View File

@@ -137,5 +137,6 @@ if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene))
done <- lapply(params, write_se_table)
# Output session information and citations
citation("tximeta")
# Removed for now because the 'tximeta' package is not found sometimes
# citation("tximeta")
sessionInfo()

View File

@@ -1,61 +0,0 @@
name: "umitools_dedup"
namespace: "umitools"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/umitools/dedup/main.nf, modules/nf-core/umitools/dedup/meta.yml]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
argument_groups:
- name: "Input"
arguments:
- name: "--paired"
type: boolean
default: false
description: Paired fastq files or not?
- name: "--bam"
type: file
description: Input BAM file
- name: "--bai"
type: file
description: BAM index
- name: "--get_output_stats"
type: boolean
description: Whether or not to generate output stats.
- name: "Output"
arguments:
- name: "--output_bam"
type: file
description: Deduplicated BAM file
direction: output
default: $id.$key.bam
- name: "--output_stats"
type: file
description: Directory containing UMI based dedupllication statistics files
direction: output
default: $id.umi_dedup.stats
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/unit_test_resources/chr19.bam
- path: /testData/unit_test_resources/chr19.bam.bai
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [pip]
- type: python
packages: [umi_tools]
runners:
- type: executable
- type: nextflow

View File

@@ -1,21 +0,0 @@
#!/bin/bash
set -eo pipefail
args="--random-seed=100"
if $par_paired; then
paired="--paired"
args+=" --unpaired-reads=discard --chimeric-pairs=discard"
else
paired=""
fi
if $par_get_output_stats; then
mkdir -p $par_output_stats
stats="--output-stats $par_output_stats/"
else
stats=""
fi
PYTHONHASHSEED=0 umi_tools dedup -I $par_bam -S $par_output_bam $stats $paired $args

View File

@@ -1,20 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
"$meta_executable" \
--paired false \
--bam $meta_resources_dir/chr19.bam \
--bai $meta_resources_dir/chr19.bam.bai \
--get_output_stats true \
--output_bam chr19.deduped.bam \
--output_stats chr19.umi_dedup.stats
echo ">>> Checking whether output exists"
[ ! -f "chr19.deduped.bam" ] && echo "File 'chr19.deduped.bam' does not exist!" && exit 1
[ ! -s "chr19.deduped.bam" ] && echo "File 'chr19.deduped.bam' is empty!" && exit 1
[ ! -d "chr19.umi_dedup.stats" ] && echo "Directory 'chr19.umi_dedup.stats' does not exist!" && exit 1
[ -z "$(ls -A 'chr19.umi_dedup.stats')" ] && echo "Directory 'chr19.umi_dedup.stats' is empty!" && exit 1
echo "All tests succeeded!"
exit 0

View File

@@ -1,93 +0,0 @@
name: "umitools_extract"
namespace: "umitools"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/nf-core/umitools/extract/main.nf, modules/nf-core/umitools/extract/meta.yml]
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
description: |
UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.
This component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.
This component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
argument_groups:
- name: "Input"
arguments:
- name: "--paired"
type: boolean
required: false
default: false
description: Paired fastq files or not?
- name: "--input"
type: file
required: true
multiple: true
multiple_sep: ","
description: Input fastq files, either one or two (paired)
example: sample.fastq
- name: "--bc_pattern"
type: string
description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI."
multiple: true
multiple_sep: ","
- name: "Output"
arguments:
- name: "--fastq_1"
type: file
required: true
description: Output file for read 1.
direction: output
default: $id.$key.read_1.fastq
- name: "--fastq_2"
type: file
required: false
must_exist: false
description: Output file for read 2.
direction: output
default: $id.$key.read_2.fastq
- name: "Optional arguments"
arguments:
- name: "--umitools_extract_method"
type: "string"
description: UMI pattern to use.
default: string
choices: [ string, regex ]
- name: "--umitools_umi_separator"
type: string
default: "_"
description: The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.
- name: "--umitools_grouping_method"
type: string
description: Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.
default: "directional"
choices: ["unique", "percentile", "cluster", "adjacency", "directional"]
- name: "--umi_discard_read"
type: integer
description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.
choices: [0, 1, 2]
default: 0
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- path: /testData/unit_test_resources/scrb_seq_fastq.1.gz
- path: /testData/unit_test_resources/scrb_seq_fastq.2.gz
- path: /testData/unit_test_resources/slim.fastq.gz
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [pip]
- type: python
packages: [umi_tools]
runners:
- type: executable
- type: nextflow

View File

@@ -1,59 +0,0 @@
#!/bin/bash
set -eo pipefail
function clean_up {
rm -rf "$tmpdir"
}
trap clean_up EXIT
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
IFS="," read -ra input <<< "$par_input"
IFS="," read -ra pattern <<< "$par_bc_pattern"
read_count="${#input[@]}"
pattern_count="${#pattern[@]}"
if [ "$par_paired" == "true" ]; then
echo "Paired - Reads: $read_count bc_patterns: $pattern_count"
if [ "$read_count" -ne 2 ] || [ "$pattern_count" -ne 2 ]; then
echo "Paired end input requires two read files and two UMI patterns"
exit 1
else
read1="$(basename -- ${input[0]})"
read2="$(basename -- ${input[1]})"
umi_tools extract \
-I "${input[0]}" --read2-in="${input[1]}" \
-S "$tmpdir/$read1" \
--read2-out="$tmpdir/$read2" \
--extract-method $par_umitools_extract_method \
--bc-pattern "${pattern[0]}" \
--bc-pattern2 "${pattern[1]}" \
--umi-separator $par_umitools_umi_separator
if [ $par_umi_discard_read == 1 ]; then
# discard read 1
cp $tmpdir/$read1 $par_fastq_1
elif [ $par_umi_discard_read == 2 ]; then
# discard read 2
cp $tmpdir/$read2 $par_fastq_1
else
cp $tmpdir/$read1 $par_fastq_1
cp $tmpdir/$read2 $par_fastq_2
fi
fi
else
echo "Not Paired - $read_count"
if [ "$read_count" -ne 1 ] || [ "$pattern_count" -ne 1 ]; then
echo "Single end input requires one read file and one UMI pattern"
exit 1
else
read1="$(basename -- ${input[0]})"
umi_tools extract \
-I "${input[0]}" -S "$tmpdir/$read1" \
--extract-method $par_umitools_extract_method \
--bc-pattern "${pattern[0]}" \
--umi-separator $par_umitools_umi_separator
cp $tmpdir/$read1 $par_fastq_1
fi
fi

View File

@@ -1,56 +0,0 @@
#!/bin/bash
echo ">>> Testing $meta_functionality_name"
echo ">>> Testing for paired-end reads"
"$meta_executable" \
--paired true \
--input $meta_resources_dir/scrb_seq_fastq.1.gz,$meta_resources_dir/scrb_seq_fastq.2.gz \
--bc_pattern CCCCCCNNNNNNNNNN,CCCCCCNNNNNNNNNN \
--umitools_extract_method string \
--umitools_umi_separator '_' \
--umitools_grouping_method directional \
--umi_discard_read 0 \
--fastq_1 scrb_seq_fastq.1.umi_extract.fastq.gz \
--fastq_2 scrb_seq_fastq.2.umi_extract.fastq.gz
echo ">> Checking if the correct files are present"
[[ ! -f scrb_seq_fastq.1.umi_extract.fastq.gz ]] || [[ ! -f scrb_seq_fastq.2.umi_extract.fastq.gz ]] && echo "Reads file missing" && exit 1
[ ! -s "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is empty" && exit 1
[ ! -s "scrb_seq_fastq.2.umi_extract.fastq.gz" ] && echo "Read 2 file is empty" && exit 1
rm scrb_seq_fastq.1.umi_extract.fastq.gz scrb_seq_fastq.2.umi_extract.fastq.gz
echo ">>> Testing for paired-end reads with umi_discard_reads option"
"$meta_executable" \
--paired true \
--input $meta_resources_dir/scrb_seq_fastq.1.gz,$meta_resources_dir/scrb_seq_fastq.2.gz \
--bc_pattern CCCCCCNNNNNNNNNN,CCCCCCNNNNNNNNNN \
--umitools_extract_method string \
--umitools_umi_separator '_' \
--umitools_grouping_method directional \
--umi_discard_read 2 \
--fastq_1 scrb_seq_fastq.1.umi_extract.fastq.gz \
echo ">> Checking if the correct files are present"
[ ! -f "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is missing" && exit 1
[ ! -s "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is empty" && exit 1
[ -f "scrb_seq_fastq.2.umi_extract.fastq.gz" ] && echo "Read 2 is not discarded" && exit 1
echo ">>> Testing for single-end reads"
"$meta_executable" \
--paired false \
--input $meta_resources_dir/slim.fastq.gz \
--bc_pattern "^(?P<umi_1>.{3}).{4}(?P<umi_2>.{2})" \
--umitools_extract_method regex \
--umitools_umi_separator '_' \
--umitools_grouping_method directional \
--umi_discard_read 0 \
--fastq_1 slim.umi_extract.fastq.gz
echo ">> Checking if the correct files are present"
[ ! -f "slim.umi_extract.fastq.gz" ] && echo "Trimmed reads file missing" && exit 1
[ ! -s "slim.umi_extract.fastq.gz" ] && echo "Trimmed reads file is empty" && exit 1
echo ">>> Test finished successfully"
exit 0

View File

@@ -1,42 +0,0 @@
name: "umitools_prepareforquant"
info:
migration_info:
git_repo: https://github.com/nf-core/rnaseq.git
paths: [modules/local/umitools_prepareforrsem.nf]
last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d
description: Fix paired-end reads in name sorted BAM file to prepare for salmon quantification
argument_groups:
- name: "Input"
arguments:
- name: "--bam"
type: file
- name: "Output"
arguments:
- name: "--output"
type: file
direction: output
default: $id.transcriptome_sorted.bam
- name: "--log"
type: file
direction: output
default: $id.$key.log
resources:
- type: bash_script
path: script.sh
# copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/prepare-for-rsem.py
- path: prepare-for-rsem.py
engines:
- type: docker
image: ubuntu:22.04
setup:
- type: apt
packages: [pip]
- type: python
packages: [umi_tools, pysam]
runners:
- type: executable
- type: nextflow

View File

@@ -1,8 +0,0 @@
#!/bin/bash
set -eo pipefail
python3 "$meta_resources_dir/prepare-for-rsem.py" \
--stdin=$par_bam \
--stdout=$par_output \
--log=$par_log

View File

@@ -4,7 +4,7 @@ description: |
A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.
argument_groups:
- name: "Input"
- name: "Input"
arguments:
- name: "--id"
required: true
@@ -84,7 +84,7 @@ argument_groups:
type: file
description: Path to directory for pre-built Salmon index.
- name: "Output"
- name: "Output"
arguments:
- name: "--star_multiqc"
type: file
@@ -190,16 +190,14 @@ dependencies:
repository: biobox
- name: samtools/samtools_idxstats
repository: biobox
- name: umitools/umitools_dedup
# - name: umi_tools/umi_tools_dedup
# repository: biobox
- name: umitools_prepareforquant
# - name: umi_tools/umi_tools_prepareforquant
# repository: biobox
- name: umi_tools/umi_tools_dedup
repository: biobox
- name: umi_tools/umi_tools_prepareforrsem
repository: biobox
- name: salmon/salmon_quant
repository: biobox
- name: rsem/rsem_calculate_expression
# repository: biobox
repository: biobox
runners:
- type: executable

View File

@@ -89,15 +89,16 @@ workflow run_wf {
//
// Deduplicate genome BAM file
| umitools_dedup.run (
| umi_tools_dedup.run (
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"paired": "paired",
"bam": "genome_bam_sorted",
"bai": "genome_bam_index",
"get_output_stats": "umi_dedup_stats"
],
toState: [ "genome_bam_sorted": "output_bam" ],
fromState: { id, state ->
def output_stats = state.umi_dedup_stats ? state.id :
[ paired: state.paired,
input: state.genome_bam,
bai: state.genome_bam_index,
output_stats: output_stats]
},
toState: [ "genome_bam_sorted": "output" ],
key: "genome_deduped"
)
| samtools_index.run (
@@ -185,15 +186,16 @@ workflow run_wf {
key: "transcriptome_idxstats"
)
| umitools_dedup.run (
| umi_tools_dedup.run (
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
fromState: [
"paired": "paired",
"bam": "transcriptome_bam",
"bai": "transcriptome_bam_index",
"get_output_stats": "umi_dedup_stats",
],
toState: [ "transcriptome_bam_deduped": "output_bam" ],
fromState: { id, state ->
def output_stats = state.umi_dedup_stats ? state.id :
[ paired: state.paired,
input: state.transcriptome_bam,
bai: state.transcriptome_bam_index,
output_stats: output_stats]
},
toState: [ "transcriptome_bam_deduped": "output" ],
key: "transcriptome_deduped"
)
| samtools_sort.run (
@@ -240,9 +242,9 @@ workflow run_wf {
)
// Fix paired-end reads in name sorted BAM file
| umitools_prepareforquant.run (
| umi_tools_prepareforrsem.run (
runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' },
fromState: [ "bam": "transcriptome_bam" ],
fromState: [ "input": "transcriptome_bam" ],
toState: [ "transcriptome_bam": "output" ]
)
@@ -294,7 +296,20 @@ workflow run_wf {
"paired": "paired",
"input": "input",
"index": "rsem_index",
"extra_args": "extra_rsem_calculate_expression_args"
"counts_gene": "rsem_counts_gene",
"counts_transcripts": "rsem_counts_transcripts",
"stat": "rsem_multiqc",
"logs": "star_multiqc",
"bam_star": "bam_star_rsem",
"bam_genome": "bam_genome_rsem",
"bam_transcript": "bam_transcript_rsem"
],
args: [
star: true,
star_output_genome_bam: true,
star_gzipped_read_file: true,
estimate_rspd: true,
seed: 1
],
toState: [
"rsem_counts_gene": "counts_gene",

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# v;iash ns build --setup cb --parallel
viash ns build --setup cb --parallel
# Split error message from standard output
# viash ns list > /dev/null
@@ -37,7 +37,6 @@ nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
--rsem_index testData/minimal_test/reference/rsem_index \
--aligner star_rsem \
--extra_rsem_calculate_expression_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \
-profile docker \
-resume

View File

@@ -50,10 +50,6 @@ argument_groups:
- name: "--stringtie_ignore_gtf"
type: boolean
description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file.
- name: "--extra_bedtools_args"
type: string
default: ''
description: Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline.
- name: "--bam_csi_index"
type: boolean
default: false
@@ -151,9 +147,8 @@ dependencies:
- name: samtools/samtools_idxstats
repository: biobox
- name: stringtie
# - name: bedtools/bedtools_genomecov
# repository: biobox
- name: bedtools_genomecov
- name: bedtools/bedtools_genomecov
repository: biobox
- name: ucsc/bedclip
- name: ucsc/bedgraphtobigwig

View File

@@ -84,14 +84,31 @@ workflow run_wf {
| bedtools_genomecov.run (
runIf: { id, state -> !state.skip_bigwig },
fromState: [
"strandedness": "strandedness",
"bam": "processed_genome_bam",
"extra_bedtools_args": "extra_bedtools_args"
"input_bam": "processed_genome_bam",
],
toState: [
"bedgraph_forward": "bedgraph_forward",
"bedgraph_reverse": "bedgraph_reverse"
]
args: [
split: true,
du: true,
bed_graph: true,
strand: "+"
],
toState: [ "bedgraph_forward": "output" ],
key: "bedtools_genomecov_forward"
)
| bedtools_genomecov.run (
runIf: { id, state -> !state.skip_bigwig },
fromState: [
"input_bam": "processed_genome_bam",
],
args: [
split: true,
du: true,
bed_graph: true,
strand: "-"
],
toState: [ "bedgraph_reverse": "output" ],
key: "bedtools_genomecov_reverse"
)
| bedclip.run (

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# viash ns build --setup cb --parallel
viash ns build --setup cb --parallel
nextflow run target/nextflow/workflows/post_processing/main.nf \
--publish_dir "testData/paired_end_test" \
@@ -14,8 +14,6 @@ nextflow run target/nextflow/workflows/post_processing/main.nf \
--chrom_sizes "testData/test_output/reference_genome.fasta.sizes" \
--star_multiqc "testData/paired_end_test/SRR6357070.star_align.log" \
--extra_picard_args "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" \
--extra_bedtools_args "-split -du" \
--extra_featurecounts_args "-B -C" \
--gencode false \
--biotype gene_biotype \
-profile docker \

View File

@@ -29,9 +29,6 @@ argument_groups:
- name: "--bbsplit_index"
type: file
description: BBsplit index
- name: "--bbsplit_fasta_list"
type: file
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)
- name: "--ribo_database_manifest"
type: file
description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.
@@ -110,12 +107,6 @@ argument_groups:
description: Specify the trimming tool to use.
choices: [ "trimgalore", "fastp"]
default: "trimgalore"
- name: "--extra_trimgalore_args"
type: string
description: Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.
# - name: "--extra_fastp_args"
# type: string
# description: Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.
- name: "--min_trimmed_reads"
type: integer
description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.
@@ -129,30 +120,14 @@ argument_groups:
description: Save the trimmed FastQ files in the results directory.
default: false
- name: "Alignment options"
arguments:
- name: "--extra_salmon_quant_args"
type: string
default: ''
description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.
- name: "Read filtering options"
arguments:
- name: "--skip_bbsplit"
type: boolean_true
description: Skip BBSplit for removal of non-reference genome reads.
# default: true
- name: "--remove_ribo_rna"
type: boolean_true
description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA.
# default: false
- name: "Other options"
arguments:
- name: "--extra_fq_subsample_args"
type: string
default: '--record-count 1000000 --seed 1'
description: Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline.
- name: "Output"
arguments:
@@ -162,78 +137,78 @@ argument_groups:
required: false
must_exist: false
description: Path to output directory
default: $id.read_1.fastq
default: ${id}_r1.fastq.gz
- name: "--qc_output2"
type: file
direction: output
required: false
must_exist: false
description: Path to output directory
default: $id.read_2.fastq
default: ${id}_r2.fastq.gz
- name: "--fastqc_html_1"
type: file
direction: output
description: FastQC HTML report for read 1.
required: false
must_exist: false
default: $id.read_1.fastqc.html
default: ${id}_r1.fastqc.html
- name: "--fastqc_html_2"
type: file
direction: output
description: FastQC HTML report for read 2.
required: false
must_exist: false
default: $id.read_2.fastqc.html
default: ${id}_r2.fastqc.html
- name: "--fastqc_zip_1"
type: file
direction: output
description: FastQC report archive for read 1.
required: false
must_exist: false
default: $id.read_1.fastqc.zip
default: ${id}_r1.fastqc.zip
- name: "--fastqc_zip_2"
type: file
direction: output
description: FastQC report archive for read 2.
required: false
must_exist: false
default: $id.read_2.fastqc.zip
default: ${id}_r2.fastqc.zip
- name: "--trim_log_1"
type: file
direction: output
required: false
must_exist: false
default: $id.read_1.trimming_report.txt
default: ${id}_r1.trimming_report.txt
- name: "--trim_log_2"
type: file
direction: output
required: false
must_exist: false
default: $id.read_2.trimming_report.txt
default: ${id}_r2.trimming_report.txt
- name: "--trim_html_1"
type: file
direction: output
required: false
must_exist: false
default: $id.read_1.trimmed_fastqc.html
default: ${id}_r1.trimmed_fastqc.html
- name: "--trim_html_2"
type: file
direction: output
required: false
must_exist: false
default: $id.read_2.trimmed_fastqc.html
default: ${id}_r2.trimmed_fastqc.html
- name: "--trim_zip_1"
type: file
direction: output
required: false
must_exist: false
default: $id.read_1.trimmed_fastqc.zip
default: ${id}_r1.trimmed_fastqc.zip
- name: "--trim_zip_2"
type: file
direction: output
required: false
must_exist: false
default: $id.read_2.trimmed_fastqc.zip
default: ${id}_r2.trimmed_fastqc.zip
- name: "--sortmerna_log"
type: file
direction: output
@@ -267,20 +242,19 @@ resources:
dependencies:
- name: fastqc
# repository: biobox
- name: umitools/umitools_extract
repository: biobox
- name: umi_tools/umi_tools_extract
repository: biobox
- name: trimgalore
# repository: biobox
- name: bbmap_bbsplit
# repository: biobox
repository: biobox
- name: bbmap/bbmap_bbsplit
repository: biobox
- name: sortmerna
# repository: biobox
repository: biobox
- name: fastp
repository: biobox
- name: fq_subsample
# repository: biobox
repository: biobox
- name: salmon/salmon_quant
repository: biobox

View File

@@ -12,48 +12,58 @@ workflow run_wf {
[ id, state + [paired: paired, input: input] ]
}
// Perform QC on input fastq files
| fastqc.run (
runIf: { id, state -> !state.skip_qc && !state.skip_fastqc },
fromState: { id, state ->
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
[ paired: state.paired,
input: input ]
},
toState: [
"fastqc_html_1": "fastqc_html_1",
"fastqc_html_2": "fastqc_html_2",
"fastqc_zip_1": "fastqc_zip_1",
"fastqc_zip_2": "fastqc_zip_2"
fromState: [ "input": "input" ],
toState: {id, output_state, state ->
def newKeys = [
"fastqc_html_1":output_state["html"][0],
"fastqc_html_2": output_state["html"][1],
"fastqc_zip_1": output_state["zip"][0],
"fastqc_zip_2": output_state["zip"][1]
]
def new_state = state + newKeys
return new_state
},
args: [html: "*.html", zip: "*.zip"]
)
// Extract UMIs from fastq files and discard read 1 or read 2 if required
| umitools_extract.run (
| umi_tools_extract.run (
runIf: { id, state -> state.with_umi && !state.skip_umi_extract },
fromState: { id, state ->
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
def bc_pattern = state.paired ? [ state.umitools_bc_pattern, state.umitools_bc_pattern2 ] : [ state.umitools_bc_pattern ]
[ paired: state.paired,
input: input,
bc_pattern: bc_pattern,
umi_discard_read: state.umi_discard_read ]
def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2)
def output = "${id}.r1.fastq.gz"
def read2_out = state.paired ? "${id}.r2.fastq.gz" : state.remove(state.fastq_2)
[ input: state.fastq_1,
read2_in: state.fastq_2,
bc_pattern: state.umitools_bc_pattern,
bc_pattern2: bc_pattern2,
extract_method: state.umitools_extract_method,
umi_separator: state.umitools_umi_separator,
grouping_method: state.umitools_grouping_method,
output: output,
read2_out: read2_out ]
},
toState: [
"fastq_1": "fastq_1",
"fastq_2": "fastq_2"
"fastq_1": "output",
"fastq_2": "read2_out"
]
)
// Discard read if required
| map { id, state ->
def paired = state.paired
def fastq_1 = state.fastq_1
def fastq_2 = state.fastq_2
if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) {
if (state.umi_discard_read == 1) {
fastq_1 = fastq_2
}
fastq_2 = state.remove(state.fastq_2)
paired = false
}
[ id, state + [paired: paired, fastq_2: fastq_2] ]
[ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ]
}
// Trim reads using Trim galore!
@@ -63,8 +73,11 @@ workflow run_wf {
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
[ paired: state.paired,
input: input,
min_trimmed_reads: state.min_trimmed_reads ]
min_trimmed_reads: state.min_trimmed_reads,
trimmed_r1: state.qc_output1,
trimmed_r2: state.qc_output2 ]
},
args: [gzip: true, fastqc: true],
toState: [
"fastq_1": "trimmed_r1",
"fastq_2": "trimmed_r2",
@@ -74,21 +87,22 @@ workflow run_wf {
"trim_zip_2": "trimmed_fastqc_zip_2",
"trim_html_1": "trimmed_fastqc_html_1",
"trim_html_2": "trimmed_fastqc_html_2"
],
args: [gzip: true, fastqc: true]
]
)
// Trim reads using fastp
| fastp.run(
runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" },
fromState: [
"in1": "fastq_1",
"in2": "fastq_2",
"merge": "fastp_save_merged",
"interleaved_in": "interleaved_reads",
"detect_adapter_for_pe": "fastp_pe_detect_adapter",
"adapter_fasta": "fastp_adapter_fasta"
],
fromState: { id, state ->
def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)]
[input_1: state.fastq_1, input_2: state.fastq_2] + outputState
[ in1: state.fastq_1,
in2: state.fastq_2,
merge: state.fastp_save_merged,
interleaved_in: state.interleaved_reads,
detect_adapter_for_pe: state.paired,
adapter_fasta: state.fastp_adapter_fasta ] + outputState
},
toState: [
"fastq_1": "out1",
"fastq_2": "out2",
@@ -102,19 +116,23 @@ workflow run_wf {
)
// Perform FASTQC on reads trimmed using fastp
| fastqc.run(
| fastqc.run (
runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" },
fromState: { id, state ->
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
[ paired: state.paired,
input: input ]
[ input: input ]
},
toState: [
"trim_html_1": "fastqc_html_1",
"trim_html_2": "fastqc_html_2",
"trim_zip_1": "fastqc_zip_1",
"trim_zip_2": "fastqc_zip_2"
],
toState: {id, output_state, state ->
def newKeys = [
"trim_html_1":output_state["html"][0],
"trim_html_2": output_state["html"][1],
"trim_zip_1": output_state["zip"][0],
"trim_zip_2": output_state["zip"][1]
]
def new_state = state + newKeys
return new_state
},
args: [html: "*.html", zip: "*.zip"],
key: "fastqc_trimming"
)
@@ -125,7 +143,7 @@ workflow run_wf {
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
[ paired: state.paired,
input: input,
built_bbsplit_index: state.bbsplit_index ]
build: state.bbsplit_index ]
},
args: ["only_build_index": false],
toState: [
@@ -141,27 +159,44 @@ workflow run_wf {
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
def filePaths = state.ribo_database_manifest.readLines()
def refs = filePaths.collect { it }
[ paired: state.paired,
def other = "${id}_non_rRNA_reads/"
[ paired_in: state.paired,
input: input,
ribo_database_manifest: refs ]
ref: refs,
out2: state.paired,
other: other ]
},
toState: [
"fastq_1": "fastq_1",
"fastq_2": "fastq_2",
"sortmerna_log": "sortmerna_log"
args: [fastx: true, num_alignments: 1],
toState: { id, output_state, state ->
def newKeys = [
"sortmerna_output": output_state["other"],
"sortmerna_log": output_state["log"]
]
def new_state = state + newKeys
return new_state
}
)
| map { id, state ->
if (state.remove_ribo_rna) {
def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"}
def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"}
[ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ]
} else {
[ id, state ]
}
}
// Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness
| fq_subsample.run (
runIf: { id, state -> state.strandedness == 'auto' },
fromState: { id, state ->
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
[
input: input,
extra_args: state.extra_fq_subsample_args
]
def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)]
[input_1: state.fastq_1, input_2: state.fastq_2] + outputState
},
args: [
record_count: 1000,
seed: 1
],
toState: [
"subsampled_fastq_1": "output_1",
"subsampled_fastq_2": "output_2"
@@ -187,6 +222,7 @@ workflow run_wf {
)
[ id, state + [lib_type: lib_type] ]
}
| salmon_quant.run (
runIf: { id, state -> state.strandedness == 'auto' },
fromState: { id, state ->
@@ -230,9 +266,6 @@ workflow run_wf {
"trim_html_1": "trim_html_1",
"trim_html_2": "trim_html_2",
"sortmerna_log": "sortmerna_log",
"failed_trim": "failed_trim",
"failed_trim_unpaired1": "failed_trim_unpaired1",
"failed_trim_unpaired2": "failed_trim_unpaired2",
"trim_json": "trim_json",
"trim_html": "trim_html",
"trim_merged_out": "trim_merged_out",

View File

@@ -1,6 +1,6 @@
#!/bin/bash
viash ns build --parallel --setup cb
viash ns build --parallel --setup cb #-q pre_processing
echo "> Preparing reference data files"
gunzip --keep testData/minimal_test/reference/genes.gtf.gz
@@ -24,10 +24,11 @@ nextflow run target/nextflow/workflows/pre_processing/main.nf \
--salmon_index testData/minimal_test/reference/salmon_index \
--skip_trimming false \
--trimmer trimgalore \
--remove_ribo_rna false \
--ribo_database_manifest src/assets/rrna-db-defaults.txt \
--remove_ribo_rna true \
--ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \
--skip_bbsplit true \
--bbsplit_index test_results/prepare_genome_test1/BBSplit_index \
--with_umi false \
-profile docker \
-resume

View File

@@ -35,7 +35,8 @@ argument_groups:
description: Skip BBSplit for removal of non-reference genome reads.
- name: "--bbsplit_fasta_list"
type: file
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)
description: List of reference genomes (separated by ";") to filter reads against with BBSplit.
multiple: true
- name: "--star_index"
type: file
description: Path to directory or tar.gz archive for pre-built STAR index.
@@ -45,18 +46,12 @@ argument_groups:
- name: "--rsem_index"
type: file
description: Path to directory or tar.gz archive for pre-built RSEM index.
- name: extra_rsem_prepare_reference_args
type: string
description: Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline.
- name: "--salmon_index"
type: file
description: Path to directory or tar.gz archive for pre-built Salmon index.
- name: "--kallisto_index"
type: file
description: Path to directory or tar.gz archive for pre-built Kallisto index.
# - name: "--hisat2_index"
# type: file
# description: Path to directory or tar.gz archive for pre-built HISAT2 index.
- name: "--bbsplit_index"
type: file
description: Path to directory or tar.gz archive for pre-built BBSplit index.
@@ -125,10 +120,6 @@ argument_groups:
direction: output
description: Path to Kallisto index.
default: Kallisto_index
# - name: "--hisat2_index_uncompressed"
# type: file
# direction: output
# description: Path to directory or tar.gz archive for pre-built HISAT2 index.
- name: "--bbsplit_index_uncompressed"
type: file
direction: output
@@ -165,11 +156,12 @@ dependencies:
repository: craftbox
- name: star/star_genome_generate
repository: biobox
- name: bbmap_bbsplit
- name: bbmap/bbmap_bbsplit
repository: biobox
- name: salmon/salmon_index
repository: biobox
- name: kallisto/kallisto_index
# repository: biobox
repository: biobox
runners:
- type: executable

View File

@@ -161,17 +161,19 @@ workflow run_wf {
args: [output: "BBSplit_index"]
)
| map {id, state ->
def ref = [state.fasta] + state.bbsplit_fasta_list
[id, state + [bbsplit_ref: ref] ]
}
// create bbsplit index, if not already availble
| bbmap_bbsplit.run (
runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index},
fromState: [
"primary_ref": "fasta",
"bbsplit_fasta_list": "bbsplit_fasta_list"
],
toState: [ "bbsplit_index": "bbsplit_index" ],
fromState: ["ref": "bbsplit_ref"],
toState: [ "bbsplit_index": "index" ],
args: [
only_build_index: true,
bbsplit_index: "BBSplit_index"
index: "BBSplit_index"
],
key: "generate_bbsplit_index"
)
@@ -254,12 +256,12 @@ workflow run_wf {
| kallisto_index.run(
runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index},
fromState: [
"transcriptome_fasta": "transcript_fasta",
"pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size"
"input": "transcript_fasta",
"kmer_size": "pseudo_aligner_kmer_size"
],
toState: [ "kallisto_index": "kallisto_index" ],
toState: [ "kallisto_index": "index" ],
key: "generate_kallisto_index",
args: [kallisto_index: "Kallisto_index"]
args: [index: "Kallisto_index"]
)
| map { id, state ->

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# viash ns build --setup cb --parallel -q prepare_genome
viash ns build --setup cb --parallel
# echo "Test 1: Annotation file format - GTF"
# nextflow run target/nextflow/workflows/prepare_genome/main.nf \
@@ -12,7 +12,7 @@
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
# --genotype false \
# --biotype gene_biotype \
# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
# -profile docker \
@@ -28,7 +28,7 @@
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
# --genotype false \
# --biotype gene_biotype \
# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
# -profile docker \
@@ -43,7 +43,7 @@ nextflow run target/nextflow/workflows/prepare_genome/main.nf \
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
--genotype false \
--biotype gene_biotype \
--bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
--bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
--pseudo_aligner kallisto \
--aligner star_rsem \
-profile docker \

View File

@@ -4,7 +4,7 @@ description: |
A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.
argument_groups:
- name: "Input"
- name: "Input"
arguments:
- name: "--id"
required: true
@@ -48,13 +48,13 @@ argument_groups:
description: Override library type inferred based on strandedness defined in meta object
default: ''
- name: "--kallisto_quant_fragment_length"
type: integer
type: double
description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.
- name: "--kallisto_quant_fragment_length_sd"
type: integer
type: double
description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.
- name: "Output"
- name: "Output"
arguments:
- name: "--pseudo_multiqc"
type: file
@@ -81,6 +81,7 @@ dependencies:
- name: salmon/salmon_quant
repository: biobox
- name: kallisto/kallisto_quant
repository: biobox
runners:
- type: executable

View File

@@ -59,19 +59,29 @@ workflow run_wf {
| kallisto_quant.run (
runIf: { id, state -> state.pseudo_aligner == 'kallisto'},
fromState: [
"input": "input",
"paired": "paired",
"gtf": "gtf",
"index": "kallisto_index",
"fragment_length": "kallisto_quant_fragment_length",
"fragment_length_sd": "kallisto_quant_fragment_length_sd"
],
toState: [
"quant_out_dir": "output",
"kallisto_quant_results_file": "quant_results_file",
"pseudo_multiqc": "log"
fromState: { id, state ->
def fr_stranded = state.strandedness == 'forward'
def rf_stranded = state.strandedness == 'reverse'
[
input: state.input,
index: state.kallisto_index,
fragment_length: state.kallisto_quant_fragment_length,
sd: state.kallisto_quant_fragment_length_sd,
single: !state.paired,
fr_stranded: fr_stranded,
rf_stranded: rf_stranded,
]
},
args: [log: "kallisto_quant.log"],
toState: { id, output_state, state ->
def neKeys = [
"quant_out_dir": output_state["output_dir"],
"kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv",
"pseudo_multiqc": output_state["log"]
]
def new_state = state + newKeys
return new_state
}
)
| map { id, state ->

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# viash ns build --setup cb -q pseudo_alignment_and_quant
viash ns build --setup cb --parallel #-q pseudo_alignment_and_quant
# Split error message from standard output
# viash ns list > /dev/null
@@ -16,30 +16,32 @@ WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse
RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
HERE
echo "> Test 1: Salmon qunatification"
nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
--publish_dir "test_results/pseudo_alignment_test1" \
--fasta testData/minimal_test/reference/genome.fasta \
--gtf testData/minimal_test/reference/genes.gtf.gz \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
--salmon_index testData/minimal_test/reference/salmon_index \
--pseudo_aligner salmon \
-profile docker \
-resume
# echo "> Test 2: Kallisto qunatification"
# echo "> Test 1: Salmon qunatification"
# nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
# --publish_dir "test_results/pseudo_alignment_test2" \
# --publish_dir "test_results/pseudo_alignment_test1" \
# --fasta testData/minimal_test/reference/genome.fasta \
# --gtf testData/minimal_test/reference/genes.gtf.gz \
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
# --kallisto_index test_results/prepare_genome_test3/Kallisto_index \
# --pseudo_aligner kallisto \
# --salmon_index testData/minimal_test/reference/salmon_index \
# --pseudo_aligner salmon \
# -profile docker \
# -resume
echo "> Test 2: Kallisto qunatification"
nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
--publish_dir "test_results/pseudo_alignment_test2" \
--fasta testData/minimal_test/reference/genome.fasta \
--gtf testData/minimal_test/reference/genes.gtf.gz \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
--kallisto_index test_results/prepare_genome_test3/Kallisto_index \
--pseudo_aligner kallisto \
--kallisto_quant_fragment_length 101.0 \
--kallisto_quant_fragment_length_sd 50.0 \
-profile docker \
-resume
echo "Removing reference data files"
rm testData/minimal_test/reference/genes.gtf
rm -r testData/minimal_test/reference/salmon_index

View File

@@ -112,9 +112,6 @@ argument_groups:
- name: "--biotype"
type: string
description: Biotype value to use while appending entries to GTF file when additional fasta file is provided.
- name: "--extra_featurecounts_args"
type: string
description: Extra arguments to pass to featureCounts command in addition to defaults defined by the pipeline
# RSeQC
- name: "--rseqc_modules"
@@ -207,12 +204,6 @@ argument_groups:
description: Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads.
# Qualimap
- name: "--output_format"
type: string
required: false
default: html
choices: [ html, pdf ]
description: Format of the qualimap output report (PDF or HTML, default is HTML)
- name: "--pr_bases"
type: integer
required: false
@@ -291,8 +282,6 @@ argument_groups:
- name: "--star_multiqc"
type: file
must_exist: false
# - name: "--hisat2_multiqc"
# type: file
- name: "--rsem_multiqc"
type: file
- name: "--genome_bam_stats"
@@ -503,17 +492,20 @@ argument_groups:
default: $id.intercept_slope.txt
# Qualimap
- name: "--qualimap_output_pdf"
- name: "--qualimap_qc_report"
direction: output
type: file
example: $id.rnaseq_qc_results.txt
description: Text file containing the RNAseq QC results.
- name: "--qualimap_counts"
type: file
direction: output
required: false
must_exist: false
default: $id.qualimap_output.pdf
- name: "--qualimap_output_dir"
description: Output file for computed counts.
- name: "--qualimap_report"
type: file
direction: output
required: false
default: $id.qualimap_output
example: $id.report.html
description: Report output file. Supported formats are PDF or HTML.
# DESeq2
- name: "--deseq2_output"
@@ -626,17 +618,19 @@ resources:
dependencies:
- name: rseqc/rseqc_bamstat
repository: biobox
- name: rseqc/rseqc_inferexperiment
- name: rseqc/rseqc_innerdistance
repository: biobox
- name: rseqc/rseqc_inner_distance
repository: biobox
- name: rseqc/rseqc_junctionannotation
- name: rseqc/rseqc_junctionsaturation
- name: rseqc/rseqc_readdistribution
- name: rseqc/rseqc_readduplication
- name: rseqc/rseqc_tin
- name: dupradar
- name: qualimap
# - name: qualimap/qualimap_rnaseq
# repository: biobox
- name: qualimap/qualimap_rnaseq
repository: biobox
- name: preseq_lcextrap
- name: featurecounts
repository: biobox
@@ -645,7 +639,7 @@ dependencies:
- name: prepare_multiqc_input
- name: multiqc
repository: biobox
- name: rsem/rsem_merge_counts
- name: rsem_merge_counts
- name: workflows/merge_quant_results
runners:

View File

@@ -65,18 +65,18 @@ workflow run_wf {
| rseqc_bamstat.run (
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align },
fromState: [
"input": "genome_bam",
"map_qual": "map_qual"
"input_file": "genome_bam",
"mapq": "map_qual"
],
toState: [ "bamstat_output": "output" ]
)
| rseqc_inferexperiment.run(
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align },
fromState: [
"input": "genome_bam",
"input_file": "genome_bam",
"refgene": "gene_bed",
"sample_size": "sample_size",
"map_qual": "map_qual"
"mapq": "map_qual"
],
toState: [ "strandedness_output": "output" ]
)
@@ -86,17 +86,17 @@ workflow run_wf {
def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true
[ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ]
}
| rseqc_innerdistance.run(
| rseqc_inner_distance.run(
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align },
key: "inner_distance",
fromState: [
"input": "genome_bam",
"input_file": "genome_bam",
"refgene": "gene_bed",
"sample_size": "sample_size",
"map_qual": "map_qual",
"lower_bound_size": "lower_bound_size",
"upper_bound_size": "upper_bound_size",
"step_size": "step_size"
"mapq": "map_qual",
"lower_bound": "lower_bound_size",
"upper_bound": "upper_bound_size",
"step": "step_size"
],
toState: [
"inner_dist_output_stats": "output_stats",
@@ -199,21 +199,23 @@ workflow run_wf {
]
)
| qualimap.run(
runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align },
// TODO: Add outdir as an output argument to the qualimap module on biobox.
// Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost.
| qualimap_rnaseq.run(
fromState: [
"input": "genome_bam",
"bam": "genome_bam",
"gtf": "gtf",
"pr_bases": "pr_bases",
"tr_bias": "tr_bias",
"num_pr_bases": "pr_bases",
"num_tr_bias": "tr_bias",
"algorithm": "algorithm",
"sequencing_protocol": "sequencing_protocol",
"sorted": "sorted",
"java_memory_size": "java_memory_size",
],
toState: [
"qualimap_output_pdf": "output_pdf",
"qualimap_output_dir": "output_dir"
"qualimap_report": "report",
"qualimap_qc_report": "qc_report",
"qualimap_counts": "counts"
]
)
@@ -338,10 +340,10 @@ workflow run_wf {
(state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ?
state.preseq_output :
null }
def qualimap_output_dir = list.collect { id, state ->
(state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ?
state.qualimap_output_dir :
null }
// def qualimap_output_dir = list.collect { id, state ->
// (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ?
// state.qualimap_output_dir :
// null }
def dupradar_output_dup_intercept_mqc = list.collect { id, state ->
(state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ?
state.dupradar_output_dup_intercept_mqc :
@@ -426,7 +428,7 @@ workflow run_wf {
featurecounts_multiqc: featurecounts_multiqc,
featurecounts_rrna_multiqc: featurecounts_rrna_multiqc,
preseq_output: preseq_output,
qualimap_output_dir: qualimap_output_dir,
// qualimap_output_dir: qualimap_output_dir,
dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc,
dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc,
bamstat_output: bamstat_output,
@@ -605,7 +607,7 @@ workflow run_wf {
"pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo",
"pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo",
"preseq_multiqc": "preseq_output",
"qualimap_multiqc": "qualimap_output_dir",
// "qualimap_multiqc": "qualimap_output_dir",
"dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc",
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
"bamstat_multiqc": "bamstat_output",
@@ -705,8 +707,9 @@ workflow run_wf {
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
"qualimap_output_dir": "qualimap_output_dir",
"qualimap_output_pdf": "qualimap_output_pdf",
"qualimap_report": "qualimap_report",
"qualimap_qc_report": "qualimap_qc_report",
"qualimap_counts": "qualimap_counts",
"featurecounts": "featurecounts",
"featurecounts_summary": "featurecounts_summary",
"featurecounts_multiqc": "featurecounts_multiqc",

View File

@@ -70,13 +70,6 @@ argument_groups:
- name: "--kallisto_index"
type: file
description: Path to directory or tar.gz archive for pre-built Kallisto index.
# - name: "--hisat2_index"
# type: file
# description: Path to directory or tar.gz archive for pre-built HISAT2 index.
# - name: "--hisat2_build_memory"
# type: string
# description: Minimum memory required to use splice sites and exons in the HiSAT2 index build process.
# default: 200.GB
- name: "--gencode"
type: boolean_true
description: Specify if the GTF annotation is in GENCODE format.
@@ -107,12 +100,6 @@ argument_groups:
description: Specify the trimming tool to use.
choices: ["trimgalore", "fastp"]
default: "trimgalore"
- name: "--extra_trimgalore_args"
type: string
description: Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.
- name: "--extra_fastp_args"
type: string
description: Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.
- name: "--min_trimmed_reads"
type: integer
description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.
@@ -122,7 +109,8 @@ argument_groups:
arguments:
- name: "--bbsplit_fasta_list"
type: file
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)
description: List of reference genomes (separated by ";") to filter reads against with BBSplit.
multiple: true
- name: "--bbsplit_index"
type: file
description: Path to directory or tar.gz archive for pre-built BBSplit index.
@@ -185,10 +173,10 @@ argument_groups:
description: Kmer length passed to indexing step of pseudoaligners.
default: 31
- name: "--kallisto_quant_fragment_length"
type: integer
type: double
description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.
- name: "--kallisto_quant_fragment_length_sd"
type: integer
type: double
description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.
- name: "--bam_csi_index"
type: boolean_true
@@ -196,10 +184,6 @@ argument_groups:
- name: "--salmon_quant_libtype"
type: string
description: Override Salmon library type inferred based on strandedness defined in meta object.
- name: "--extra_salmon_quant_args"
type: string
default: '-v'
description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.
- name: "--min_mapped_reads"
type: integer
description: Minimum percentage of uniquely mapped reads below which samples are removed from further processing.
@@ -223,10 +207,6 @@ argument_groups:
- name: "--skip_pseudo_alignment"
type: boolean_true
description: Skip all of the pseudo-alignment-based processes within the pipeline.
- name: --extra_rsem_calculate_expression_args
type: string
description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.
default: '--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1'
- name: Process skipping options
arguments:
@@ -281,18 +261,10 @@ argument_groups:
- name: Other process arguments
arguments:
- name: "--extra_fq_subsample_args"
type: string
default: ' --record-count 1000000 --seed 1'
description: Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline.
- name: "--extra_picard_args"
type: string
default: ' --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'
description: Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline.
- name: "--extra_bedtools_args"
type: string
default: ' -split -du'
description: Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline.
- name: "--extra_preseq_args"
type: string
description: Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline
@@ -367,14 +339,14 @@ argument_groups:
required: false
must_exist: false
description: Path to output directory
default: fastq/$id.read_1.fastq.gz
default: fastq/${id}_r1.fastq.gz
- name: "--output_fastq_2"
type: file
direction: output
required: false
must_exist: false
description: Path to output directory
default: fastq/$id.read_2.fastq.gz
default: fastq/${id}_r2.fastq.gz
# FastQC
- name: "--fastqc_html_1"
@@ -383,52 +355,52 @@ argument_groups:
description: FastQC HTML report for read 1.
required: false
must_exist: false
default: fastqc_raw/$id.read_1.fastqc.html
default: fastqc_raw/${id}_r1.fastqc.html
- name: "--fastqc_html_2"
type: file
direction: output
description: FastQC HTML report for read 2.
required: false
must_exist: false
default: fastqc_raw/$id.read_2.fastqc.html
default: fastqc_raw/${id}_r2.fastqc.html
- name: "--fastqc_zip_1"
type: file
direction: output
description: FastQC report archive for read 1.
required: false
must_exist: false
default: fastqc_raw/$id.read_1.fastqc.zip
default: fastqc_raw/${id}_r1.fastqc.zip
- name: "--fastqc_zip_2"
type: file
direction: output
description: FastQC report archive for read 2.
required: false
must_exist: false
default: fastqc_raw/$id.read_2.fastqc.zip
default: fastqc_raw/${id}_r2.fastqc.zip
- name: "--trim_html_1"
type: file
direction: output
required: false
must_exist: false
default: fastqc_trim/$id.read_1.trimmed_fastqc.html
default: fastqc_trim/${id}_r1.trimmed_fastqc.html
- name: "--trim_html_2"
type: file
direction: output
required: false
must_exist: false
default: fastqc_trim/$id.read_2.trimmed_fastqc.html
default: fastqc_trim/${id}_r2.trimmed_fastqc.html
- name: "--trim_zip_1"
type: file
direction: output
required: false
must_exist: false
default: fastqc_trim/$id.read_1.trimmed_fastqc.zip
default: fastqc_trim/${id}_r1.trimmed_fastqc.zip
- name: "--trim_zip_2"
type: file
direction: output
required: false
must_exist: false
default: fastqc_trim/$id.read_2.trimmed_fastqc.zip
default: fastqc_trim/${id}_r2.trimmed_fastqc.zip
# TrimGalore
- name: "--trim_log_1"
@@ -436,13 +408,13 @@ argument_groups:
direction: output
required: false
must_exist: false
default: trimgalore/$id.read_1.trimming_report.txt
default: trimgalore/${id}_r1.trimming_report.txt
- name: "--trim_log_2"
type: file
direction: output
required: false
must_exist: false
default: trimgalore/$id.read_2.trimming_report.txt
default: trimgalore/${id}_r2.trimming_report.txt
# fastp
- name: --fastp_trim_json
@@ -842,17 +814,21 @@ argument_groups:
default: dupradar/intercept_slope/$id.intercept_slope.txt
# Qualimap
- name: "--qualimap_output_pdf"
- name: "--qualimap_qc_report"
direction: output
type: file
default: Qualimap/$id.rnaseq_qc_results.txt
description: Text file containing the RNAseq QC results.
- name: "--qualimap_counts"
type: file
direction: output
required: false
must_exist: false
default: qualimap/$id.qualimap_output.pdf
- name: "--qualimap_output_dir"
default: Qualimap/$id.counts.txt
description: Output file for computed counts.
- name: "--qualimap_report"
type: file
direction: output
required: false
default: qualimap/$id
default: Qualimap/$id.report.html
description: Report output file. Supported formats are PDF or HTML.
# DESeq2
- name: "--deseq2_output"

View File

@@ -419,8 +419,9 @@ workflow run_wf {
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
"qualimap_output_dir": "qualimap_output_dir",
"qualimap_output_pdf": "qualimap_output_pdf",
"qualimap_report": "qualimap_report",
"qualimap_qc_report": "qualimap_qc_report",
"qualimap_counts": "qualimap_counts",
"featurecounts": "featurecounts",
"featurecounts_summary": "featurecounts_summary",
"featurecounts_multiqc": "featurecounts_multiqc",
@@ -534,8 +535,9 @@ workflow run_wf {
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
"qualimap_output_dir": "qualimap_output_dir",
"qualimap_output_pdf": "qualimap_output_pdf",
"qualimap_report": "qualimap_report",
"qualimap_qc_report": "qualimap_qc_report",
"qualimap_counts": "qualimap_counts",
"tpm_gene": "tpm_gene",
"counts_gene": "counts_gene",
"counts_gene_length_scaled": "counts_gene_length_scaled",

View File

@@ -1,6 +1,6 @@
#!/bin/bash
# viash ns build --setup cb --parallel
viash ns build --setup cb --parallel
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
id,fastq_1,fastq_2,strandedness
@@ -19,7 +19,7 @@ nextflow run target/nextflow/workflows/rnaseq/main.nf \
--gtf testData/minimal_test/reference/genes.gtf.gz \
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
--bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
--bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
--skip_pseudo_alignment \
-profile docker \
--resume

View File

@@ -0,0 +1,395 @@
name: "bbmap_bbsplit"
namespace: "bbmap"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--paired"
description: "Paired fastq files or not?"
info: null
direction: "input"
- type: "file"
name: "--input"
description: "Input fastq files, either one or two (paired), separated by \";\"\
."
info: null
example:
- "reads.fastq"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--ref"
description: "Reference FASTA files, separated by \";\". The primary reference\
\ should be specified first."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "boolean_true"
name: "--only_build_index"
description: "If set, only builds the index. Otherwise, mapping is performed."
info: null
direction: "input"
- type: "file"
name: "--build"
description: "Index to be used for mapping. \n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--qin"
description: "Set to 33 or 64 to specify input quality value ASCII offset. Automatically\
\ detected if\nnot specified.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--interleaved"
description: "True forces paired/interleaved input; false forces single-ended\
\ mapping.\nIf not specified, interleaved status will be autodetected from read\
\ names.\n"
info: null
direction: "input"
- type: "integer"
name: "--maxindel"
description: "Don't look for indels longer than this. Lower is faster. Set to\
\ >=100k for RNA-seq.\n"
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--minratio"
description: "Fraction of max alignment score required to keep a site. Higher\
\ is faster.\n"
info: null
example:
- 0.56
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--minhits"
description: "Minimum number of seed hits required for candidate sites. Higher\
\ is faster.\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--ambiguous"
description: "Set behavior on ambiguously-mapped reads (with multiple top-scoring\
\ mapping locations).\n * best Use the first best site (Default)\n * toss\
\ Consider unmapped\n * random Select one top-scoring site randomly\n \
\ * all Retain all top-scoring sites. Does not work yet with SAM output\n"
info: null
example:
- "best"
required: false
choices:
- "best"
- "toss"
- "random"
- "all"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--ambiguous2"
description: "Set behavior only for reads that map ambiguously to multiple different\
\ references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\n\
Ambiguous2 excludes reads that map ambiguously within a single reference.\n\
\ * best Use the first best site (Default)\n * toss Consider unmapped\n\
\ * all Write a copy to the output for each reference to which it maps\n\
\ * split Write a copy to the AMBIGUOUS_ output for each reference to which\
\ it maps\n"
info: null
example:
- "best"
required: false
choices:
- "best"
- "toss"
- "all"
- "split"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--qtrim"
description: "Quality-trim ends to Q5 before mapping. Options are 'l' (left),\
\ 'r' (right), and 'lr' (both).\n"
info: null
required: false
choices:
- "l"
- "r"
- "lr"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--untrim"
description: "Undo trimming after mapping. Untrimmed bases will be soft-clipped\
\ in cigar strings."
info: null
direction: "input"
- name: "Output"
arguments:
- type: "file"
name: "--index"
description: "Location to write the index.\n"
info: null
example:
- "BBSplit_index"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_1"
description: "Output file for read 1.\n"
info: null
example:
- "read_out1.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--fastq_2"
description: "Output file for read 2.\n"
info: null
example:
- "read_out2.fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sam2bam"
alternatives:
- "--bs"
description: "Write a shell script to 'file' that will turn the sam output into\
\ a sorted, indexed bam file.\n"
info: null
example:
- "script.sh"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--scafstats"
description: "Write statistics on how many reads mapped to which scaffold to this\
\ file.\n"
info: null
example:
- "scaffold_stats.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--refstats"
description: "Write statistics on how many reads were assigned to which reference\
\ to this file.\nUnmapped reads whose mate mapped to a reference are considered\
\ assigned and will be counted.\n"
info: null
example:
- "reference_stats.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--nzo"
description: "Only print lines with nonzero coverage."
info: null
direction: "input"
- type: "string"
name: "--bbmap_args"
description: "Additional arguments from BBMap to pass to BBSplit.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Split sequencing reads by mapping them to multiple references simultaneously."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "BBTools Copyright (c) 2014"
links:
repository: "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh"
homepage: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/"
documentation: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\
\ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\
\ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n"
- type: "docker"
run:
- "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \"BBMAP:\", $NF}' >\
\ /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bbmap/bbmap_bbsplit/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/bbmap/bbmap_bbsplit"
executable: "target/nextflow/bbmap/bbmap_bbsplit/main.nf"
viash_version: "0.9.0"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "biobox"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -1,4 +1,4 @@
// umitools_extract main
// bbmap_bbsplit main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -2804,19 +2804,91 @@ nextflow.enable.dsl=2
meta = [
"resources_dir": moduleDir.toRealPath().normalize(),
"config": processConfig(readJsonBlob('''{
"name" : "umitools_extract",
"namespace" : "umitools",
"name" : "bbmap_bbsplit",
"namespace" : "bbmap",
"version" : "main",
"argument_groups" : [
{
"name" : "Input",
"arguments" : [
{
"type" : "boolean",
"type" : "string",
"name" : "--id",
"description" : "Sample ID",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--paired",
"description" : "Paired fastq files or not?",
"default" : [
false
"direction" : "input"
},
{
"type" : "file",
"name" : "--input",
"description" : "Input fastq files, either one or two (paired), separated by \\";\\".",
"example" : [
"reads.fastq"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--ref",
"description" : "Reference FASTA files, separated by \\";\\". The primary reference should be specified first.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--only_build_index",
"description" : "If set, only builds the index. Otherwise, mapping is performed.",
"direction" : "input"
},
{
"type" : "file",
"name" : "--build",
"description" : "Index to be used for mapping. \n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--qin",
"description" : "Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--interleaved",
"description" : "True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--maxindel",
"description" : "Don't look for indels longer than this. Lower is faster. Set to >=100k for RNA-seq.\n",
"example" : [
20
],
"required" : false,
"direction" : "input",
@@ -2824,27 +2896,84 @@ meta = [
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--input",
"description" : "Input fastq files, either one or two (paired)",
"type" : "double",
"name" : "--minratio",
"description" : "Fraction of max alignment score required to keep a site. Higher is faster.\n",
"example" : [
"sample.fastq"
0.56
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
"required" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ","
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--minhits",
"description" : "Minimum number of seed hits required for candidate sites. Higher is faster.\n",
"example" : [
1
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--bc_pattern",
"description" : "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI.",
"name" : "--ambiguous",
"description" : "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n",
"example" : [
"best"
],
"required" : false,
"choices" : [
"best",
"toss",
"random",
"all"
],
"direction" : "input",
"multiple" : true,
"multiple_sep" : ","
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--ambiguous2",
"description" : "Set behavior only for reads that map ambiguously to multiple different references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n",
"example" : [
"best"
],
"required" : false,
"choices" : [
"best",
"toss",
"all",
"split"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--qtrim",
"description" : "Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r' (right), and 'lr' (both).\n",
"required" : false,
"choices" : [
"l",
"r",
"lr"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--untrim",
"description" : "Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings.",
"direction" : "input"
}
]
},
@@ -2853,14 +2982,28 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--fastq_1",
"description" : "Output file for read 1.",
"default" : [
"$id.$key.read_1.fastq"
"name" : "--index",
"description" : "Location to write the index.\n",
"example" : [
"BBSplit_index"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--fastq_1",
"description" : "Output file for read 1.\n",
"example" : [
"read_out1.fastq"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
@@ -2868,82 +3011,73 @@ meta = [
{
"type" : "file",
"name" : "--fastq_2",
"description" : "Output file for read 2.",
"default" : [
"$id.$key.read_2.fastq"
"description" : "Output file for read 2.\n",
"example" : [
"read_out2.fastq"
],
"must_exist" : false,
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "Optional arguments",
"arguments" : [
{
"type" : "string",
"name" : "--umitools_extract_method",
"description" : "UMI pattern to use.",
"default" : [
"string"
"type" : "file",
"name" : "--sam2bam",
"alternatives" : [
"--bs"
],
"description" : "Write a shell script to 'file' that will turn the sam output into a sorted, indexed bam file.\n",
"example" : [
"script.sh"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"choices" : [
"string",
"regex"
],
"direction" : "input",
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--umitools_umi_separator",
"description" : "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.",
"default" : [
"_"
"type" : "file",
"name" : "--scafstats",
"description" : "Write statistics on how many reads mapped to which scaffold to this file.\n",
"example" : [
"scaffold_stats.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--umitools_grouping_method",
"description" : "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.",
"default" : [
"directional"
"type" : "file",
"name" : "--refstats",
"description" : "Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n",
"example" : [
"reference_stats.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"choices" : [
"unique",
"percentile",
"cluster",
"adjacency",
"directional"
],
"direction" : "input",
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--umi_discard_read",
"description" : "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.",
"default" : [
0
],
"type" : "boolean_true",
"name" : "--nzo",
"description" : "Only print lines with nonzero coverage.",
"direction" : "input"
},
{
"type" : "string",
"name" : "--bbmap_args",
"description" : "Additional arguments from BBMap to pass to BBSplit.\n",
"required" : false,
"choices" : [
0,
1,
2
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
@@ -2958,56 +3092,26 @@ meta = [
"is_executable" : true
}
],
"description" : "UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.\nThis component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place\n",
"description" : "Split sequencing reads by mapping them to multiple references simultaneously.",
"test_resources" : [
{
"type" : "bash_script",
"path" : "test.sh",
"is_executable" : true
},
{
"type" : "file",
"path" : "/testData/unit_test_resources/scrb_seq_fastq.1.gz"
},
{
"type" : "file",
"path" : "/testData/unit_test_resources/scrb_seq_fastq.2.gz"
},
{
"type" : "file",
"path" : "/testData/unit_test_resources/slim.fastq.gz"
}
],
"info" : {
"migration_info" : {
"git_repo" : "https://github.com/nf-core/rnaseq.git",
"paths" : [
"modules/nf-core/umitools/extract/main.nf",
"modules/nf-core/umitools/extract/meta.yml"
],
"last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33"
}
},
"status" : "enabled",
"requirements" : {
"commands" : [
"ps"
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
"license" : "BBTools Copyright (c) 2014",
"links" : {
"repository" : "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh",
"homepage" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/",
"documentation" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"runners" : [
{
"type" : "executable",
@@ -3092,19 +3196,16 @@ meta = [
"namespace_separator" : "/",
"setup" : [
{
"type" : "apt",
"packages" : [
"pip"
],
"interactive" : false
"type" : "docker",
"run" : [
"apt-get update && \\\\\napt-get install -y build-essential openjdk-17-jdk wget tar && \\\\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \\\\\ntar xzf BBMap_39.01.tar.gz && \\\\\ncp -r bbmap/* /usr/local/bin\n"
]
},
{
"type" : "python",
"user" : false,
"packages" : [
"umi_tools"
],
"upgrade" : true
"type" : "docker",
"run" : [
"bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \\"BBMAP:\\", $NF}' > /var/software_versions.txt\n"
]
}
]
},
@@ -3114,49 +3215,39 @@ meta = [
}
],
"build_info" : {
"config" : "/workdir/root/repo/src/umitools/umitools_extract/config.vsh.yaml",
"config" : "/workdir/root/repo/src/bbmap/bbmap_bbsplit/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/umitools/umitools_extract",
"output" : "target/nextflow/bbmap/bbmap_bbsplit",
"viash_version" : "0.9.0",
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
"git_tag" : "v0.2.0-26-ga13b57d"
},
"package_config" : {
"name" : "rnaseq",
"version" : "main",
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-test-data/rnaseq/v1",
"dest" : "testData"
}
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"version" : "main",
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
"viash_version" : "0.9.0",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"source" : "src",
"target" : "target",
"config_mods" : [
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
".requirements.commands := ['ps']\n",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'main'"
],
"organization" : "vsh"
"keywords" : [
"bioinformatics",
"modules",
"sequencing"
],
"license" : "MIT",
"organization" : "vsh",
"links" : {
"repository" : "https://github.com/viash-hub/biobox",
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
}
}
}'''))
]
@@ -3170,17 +3261,33 @@ def innerWorkflowFactory(args) {
def rawScript = '''set -e
tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
#!/bin/bash
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi )
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_BC_PATTERN+x} ]; then echo "${VIASH_PAR_BC_PATTERN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bc_pattern='&'#" ; else echo "# par_bc_pattern="; fi )
$( if [ ! -z ${VIASH_PAR_REF+x} ]; then echo "${VIASH_PAR_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref='&'#" ; else echo "# par_ref="; fi )
$( if [ ! -z ${VIASH_PAR_ONLY_BUILD_INDEX+x} ]; then echo "${VIASH_PAR_ONLY_BUILD_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_only_build_index='&'#" ; else echo "# par_only_build_index="; fi )
$( if [ ! -z ${VIASH_PAR_BUILD+x} ]; then echo "${VIASH_PAR_BUILD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_build='&'#" ; else echo "# par_build="; fi )
$( if [ ! -z ${VIASH_PAR_QIN+x} ]; then echo "${VIASH_PAR_QIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qin='&'#" ; else echo "# par_qin="; fi )
$( if [ ! -z ${VIASH_PAR_INTERLEAVED+x} ]; then echo "${VIASH_PAR_INTERLEAVED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interleaved='&'#" ; else echo "# par_interleaved="; fi )
$( if [ ! -z ${VIASH_PAR_MAXINDEL+x} ]; then echo "${VIASH_PAR_MAXINDEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_maxindel='&'#" ; else echo "# par_maxindel="; fi )
$( if [ ! -z ${VIASH_PAR_MINRATIO+x} ]; then echo "${VIASH_PAR_MINRATIO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minratio='&'#" ; else echo "# par_minratio="; fi )
$( if [ ! -z ${VIASH_PAR_MINHITS+x} ]; then echo "${VIASH_PAR_MINHITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minhits='&'#" ; else echo "# par_minhits="; fi )
$( if [ ! -z ${VIASH_PAR_AMBIGUOUS+x} ]; then echo "${VIASH_PAR_AMBIGUOUS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous='&'#" ; else echo "# par_ambiguous="; fi )
$( if [ ! -z ${VIASH_PAR_AMBIGUOUS2+x} ]; then echo "${VIASH_PAR_AMBIGUOUS2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous2='&'#" ; else echo "# par_ambiguous2="; fi )
$( if [ ! -z ${VIASH_PAR_QTRIM+x} ]; then echo "${VIASH_PAR_QTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qtrim='&'#" ; else echo "# par_qtrim="; fi )
$( if [ ! -z ${VIASH_PAR_UNTRIM+x} ]; then echo "${VIASH_PAR_UNTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_untrim='&'#" ; else echo "# par_untrim="; fi )
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi )
$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi )
$( if [ ! -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_EXTRACT_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_extract_method='&'#" ; else echo "# par_umitools_extract_method="; fi )
$( if [ ! -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMITOOLS_UMI_SEPARATOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_umi_separator='&'#" ; else echo "# par_umitools_umi_separator="; fi )
$( if [ ! -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_GROUPING_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_grouping_method='&'#" ; else echo "# par_umitools_grouping_method="; fi )
$( if [ ! -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then echo "${VIASH_PAR_UMI_DISCARD_READ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_discard_read='&'#" ; else echo "# par_umi_discard_read="; fi )
$( if [ ! -z ${VIASH_PAR_SAM2BAM+x} ]; then echo "${VIASH_PAR_SAM2BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam2bam='&'#" ; else echo "# par_sam2bam="; fi )
$( if [ ! -z ${VIASH_PAR_SCAFSTATS+x} ]; then echo "${VIASH_PAR_SCAFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scafstats='&'#" ; else echo "# par_scafstats="; fi )
$( if [ ! -z ${VIASH_PAR_REFSTATS+x} ]; then echo "${VIASH_PAR_REFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refstats='&'#" ; else echo "# par_refstats="; fi )
$( if [ ! -z ${VIASH_PAR_NZO+x} ]; then echo "${VIASH_PAR_NZO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nzo='&'#" ; else echo "# par_nzo="; fi )
$( if [ ! -z ${VIASH_PAR_BBMAP_ARGS+x} ]; then echo "${VIASH_PAR_BBMAP_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bbmap_args='&'#" ; else echo "# par_bbmap_args="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3201,7 +3308,6 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
## VIASH END
#!/bin/bash
set -eo pipefail
@@ -3210,56 +3316,85 @@ function clean_up {
}
trap clean_up EXIT
tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX")
unset_if_false=( par_paired par_only_build_index par_interleaved par_untrim par_nzo)
IFS="," read -ra input <<< "\\$par_input"
IFS="," read -ra pattern <<< "\\$par_bc_pattern"
read_count="\\${#input[@]}"
pattern_count="\\${#pattern[@]}"
if [ "\\$par_paired" == "true" ]; then
echo "Paired - Reads: \\$read_count bc_patterns: \\$pattern_count"
if [ "\\$read_count" -ne 2 ] || [ "\\$pattern_count" -ne 2 ]; then
echo "Paired end input requires two read files and two UMI patterns"
exit 1
else
read1="\\$(basename -- \\${input[0]})"
read2="\\$(basename -- \\${input[1]})"
umi_tools extract \\\\
-I "\\${input[0]}" --read2-in="\\${input[1]}" \\\\
-S "\\$tmpdir/\\$read1" \\\\
--read2-out="\\$tmpdir/\\$read2" \\\\
--extract-method \\$par_umitools_extract_method \\\\
--bc-pattern "\\${pattern[0]}" \\\\
--bc-pattern2 "\\${pattern[1]}" \\\\
--umi-separator \\$par_umitools_umi_separator
if [ \\$par_umi_discard_read == 1 ]; then
# discard read 1
cp \\$tmpdir/\\$read1 \\$par_fastq_1
elif [ \\$par_umi_discard_read == 2 ]; then
# discard read 2
cp \\$tmpdir/\\$read2 \\$par_fastq_1
else
cp \\$tmpdir/\\$read1 \\$par_fastq_1
cp \\$tmpdir/\\$read2 \\$par_fastq_2
for var in "\\${unset_if_false[@]}"; do
if [ -z "\\${!var}" ]; then
unset \\$var
fi
done
if [ ! -d "\\$par_build" ]; then
IFS=";" read -ra ref_files <<< "\\$par_ref"
primary_ref="\\${ref_files[0]}"
refs=()
for file in "\\${ref_files[@]:1}"
do
name=\\$(basename "\\$file" | sed 's/\\\\.[^.]*\\$//')
refs+=("ref_\\$name=\\$file")
done
fi
if \\$par_only_build_index; then
if [ "\\${#refs[@]}" -gt 1 ]; then
bbsplit.sh \\\\
--ref_primary="\\$primary_ref" \\\\
"\\${refs[@]}" \\\\
path=\\$par_index
else
echo "ERROR: Please specify at least two reference fasta files."
fi
else
echo "Not Paired - \\$read_count"
if [ "\\$read_count" -ne 1 ] || [ "\\$pattern_count" -ne 1 ]; then
echo "Single end input requires one read file and one UMI pattern"
exit 1
IFS=";" read -ra input <<< "\\$par_input"
tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX")
index_files=''
if [ -d "\\$par_build" ]; then
index_files="path=\\$par_build"
elif [ \\${#refs[@]} -gt 0 ]; then
index_files="--ref_primary=\\$primary_ref \\${refs[*]}"
else
read1="\\$(basename -- \\${input[0]})"
umi_tools extract \\\\
-I "\\${input[0]}" -S "\\$tmpdir/\\$read1" \\\\
--extract-method \\$par_umitools_extract_method \\\\
--bc-pattern "\\${pattern[0]}" \\\\
--umi-separator \\$par_umitools_umi_separator
cp \\$tmpdir/\\$read1 \\$par_fastq_1
echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files."
fi
extra_args=""
if [ -f "\\$par_refstats" ]; then extra_args+=" --refstats \\$par_refstats"; fi
if [ -n "\\$par_ambiguous" ]; then extra_args+=" --ambiguous \\$par_ambiguous"; fi
if [ -n "\\$par_ambiguous2" ]; then extra_args+=" --ambiguous2 \\$par_ambiguous2"; fi
if [ -n "\\$par_minratio" ]; then extra_args+=" --minratio \\$par_minratio"; fi
if [ -n "\\$par_minhits" ]; then extra_args+=" --minhits \\$par_minhits"; fi
if [ -n "\\$par_maxindel" ]; then extra_args+=" --maxindel \\$par_maxindel"; fi
if [ -n "\\$par_qin" ]; then extra_args+=" --qin \\$par_qin"; fi
if [ -n "\\$par_qtrim" ]; then extra_args+=" --qtrim \\$par_qtrim"; fi
if [ "\\$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi
if [ "\\$par_untrim" = true ]; then extra_args+=" --untrim"; fi
if [ "\\$par_nzo" = true ]; then extra_args+=" --nzo"; fi
if [ -n "\\$par_bbmap_args" ]; then extra_args+=" \\$par_bbmap_args"; fi
if \\$par_paired; then
bbsplit.sh \\\\
\\$index_files \\\\
in=\\${input[0]} \\\\
in2=\\${input[1]} \\\\
basename=\\${tmpdir}/%_#.fastq \\\\
\\$extra_args
read1=\\$(find \\$tmpdir/ -iname primary_1*)
read2=\\$(find \\$tmpdir/ -iname primary_2*)
cp \\$read1 \\$par_fastq_1
cp \\$read2 \\$par_fastq_2
else
bbsplit.sh \\\\
\\$index_files \\\\
in=\\${input[0]} \\\\
basename=\\${tmpdir}/%.fastq \\\\
\\$extra_args
read1=\\$(find \\$tmpdir/ -iname primary*)
cp \\$read1 \\$par_fastq_1
fi
fi
exit 0
VIASHMAIN
bash "$tempscript"
'''
@@ -3620,7 +3755,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/rnaseq/umitools/umitools_extract",
"image" : "vsh/biobox/bbmap/bbmap_bbsplit",
"tag" : "main"
},
"tag" : "$id"

View File

@@ -1,9 +1,9 @@
manifest {
name = 'bbmap_bbsplit'
name = 'bbmap/bbmap_bbsplit'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Split sequencing reads by mapping them to multiple references simultaneously.\n'
description = 'Split sequencing reads by mapping them to multiple references simultaneously.'
}
process.container = 'nextflow/bash:latest'

View File

@@ -0,0 +1,321 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "bbmap_bbsplit",
"description": "Split sequencing reads by mapping them to multiple references simultaneously.",
"type": "object",
"definitions": {
"input" : {
"title": "Input",
"type": "object",
"description": "No description",
"properties": {
"id": {
"type":
"string",
"description": "Type: `string`. Sample ID",
"help_text": "Type: `string`. Sample ID"
}
,
"paired": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Paired fastq files or not?",
"help_text": "Type: `boolean_true`, default: `false`. Paired fastq files or not?"
,
"default": "False"
}
,
"input": {
"type":
"string",
"description": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"",
"help_text": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"."
}
,
"ref": {
"type":
"string",
"description": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\"",
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\". The primary reference should be specified first."
}
,
"only_build_index": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. If set, only builds the index",
"help_text": "Type: `boolean_true`, default: `false`. If set, only builds the index. Otherwise, mapping is performed."
,
"default": "False"
}
,
"build": {
"type":
"string",
"description": "Type: `file`. Index to be used for mapping",
"help_text": "Type: `file`. Index to be used for mapping. \n"
}
,
"qin": {
"type":
"string",
"description": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset",
"help_text": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n"
}
,
"interleaved": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping",
"help_text": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n"
,
"default": "False"
}
,
"maxindel": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this",
"help_text": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this. Lower is faster. Set to \u003e=100k for RNA-seq.\n"
}
,
"minratio": {
"type":
"number",
"description": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site",
"help_text": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site. Higher is faster.\n"
}
,
"minhits": {
"type":
"integer",
"description": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites",
"help_text": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites. Higher is faster.\n"
}
,
"ambiguous": {
"type":
"string",
"description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)",
"help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n",
"enum": ["best", "toss", "random", "all"]
}
,
"ambiguous2": {
"type":
"string",
"description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references",
"help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references.\nNormal \u0027ambiguous=\u0027 controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n",
"enum": ["best", "toss", "all", "split"]
}
,
"qtrim": {
"type":
"string",
"description": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping",
"help_text": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping. Options are \u0027l\u0027 (left), \u0027r\u0027 (right), and \u0027lr\u0027 (both).\n",
"enum": ["l", "r", "lr"]
}
,
"untrim": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Undo trimming after mapping",
"help_text": "Type: `boolean_true`, default: `false`. Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings."
,
"default": "False"
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"index": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index",
"help_text": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index.\n"
,
"default": "$id.$key.index.index"
}
,
"fastq_1": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1",
"help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1.\n"
,
"default": "$id.$key.fastq_1.fastq"
}
,
"fastq_2": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2",
"help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2.\n"
,
"default": "$id.$key.fastq_2.fastq"
}
,
"sam2bam": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file",
"help_text": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file.\n"
,
"default": "$id.$key.sam2bam.sh"
}
,
"scafstats": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file",
"help_text": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file.\n"
,
"default": "$id.$key.scafstats.txt"
}
,
"refstats": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file",
"help_text": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n"
,
"default": "$id.$key.refstats.txt"
}
,
"nzo": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage",
"help_text": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage."
,
"default": "False"
}
,
"bbmap_args": {
"type":
"string",
"description": "Type: `string`. Additional arguments from BBMap to pass to BBSplit",
"help_text": "Type: `string`. Additional arguments from BBMap to pass to BBSplit.\n"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,364 @@
name: "bedtools_genomecov"
namespace: "bedtools"
version: "main"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "The input file (BED/GFF/VCF) to be used.\n"
info: null
example:
- "input.bed"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_bam"
alternatives:
- "-ibam"
description: "The input file is in BAM format.\nNote: BAM _must_ be sorted by\
\ positions.\n'--genome' option is ignored if you use '--input_bam' option!\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--genome"
alternatives:
- "-g"
description: "The genome file to be used.\n"
info: null
example:
- "genome.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "The output BED file. \n"
info: null
example:
- "output.bed"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--depth"
alternatives:
- "-d"
description: "Report the depth at each genome position (with one-based coordinates).\n\
Default behavior is to report a histogram.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--depth_zero"
alternatives:
- "-dz"
description: "Report the depth at each genome position (with zero-based coordinates).\n\
Reports only non-zero positions.\nDefault behavior is to report a histogram.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed_graph"
alternatives:
- "-bg"
description: "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--bed_graph_zero_coverage"
alternatives:
- "-bga"
description: "Report depth in BedGraph format, as above (-bg).\nHowever with this\
\ option, regions with zero \ncoverage are also reported. This allows one to\n\
quickly extract all regions of a genome with 0 \ncoverage by applying: \"grep\
\ -w 0$\" to the output.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--split"
description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals.\n\
when computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\"\
\ operations \nto infer the blocks for computing coverage.\nFor BED12 files,\
\ this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns\
\ 10,11,12).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--ignore_deletion"
alternatives:
- "-ignoreD"
description: "Ignore local deletions (CIGAR \"D\" operations) in BAM entries\n\
when computing coverage.\n"
info: null
direction: "input"
- type: "string"
name: "--strand"
description: "Calculate coverage of intervals from a specific strand.\nWith BED\
\ files, requires at least 6 columns (strand is column 6). \n"
info: null
required: false
choices:
- "+"
- "-"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--pair_end_coverage"
alternatives:
- "-pc"
description: "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--fragment_size"
alternatives:
- "-fs"
description: "Force to use provided fragment size instead of read length\nWorks\
\ for BAM files only\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--du"
description: "Change strand af the mate read (so both reads from the same strand)\
\ useful for strand specific\nWorks for BAM files only\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--five_prime"
alternatives:
- "-5"
description: "Calculate coverage of 5\" positions (instead of entire interval).\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--three_prime"
alternatives:
- "-3"
description: "Calculate coverage of 3\" positions (instead of entire interval).\n"
info: null
direction: "input"
- type: "integer"
name: "--max"
description: "Combine all positions with a depth >= max into\na single bin in\
\ the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n"
info: null
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--scale"
description: "Scale the coverage by a constant factor.\nEach coverage value is\
\ multiplied by this factor before being reported.\nUseful for normalizing coverage\
\ by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n"
info: null
required: false
min: 0.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--trackline"
description: "Adds a UCSC/Genome-Browser track line definition in the first line\
\ of the output.\n- See here for more details about track line definition:\n\
\ http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding\
\ a trackline definition, the output BedGraph can be easily\n uploaded\
\ to the Genome Browser as a custom track,\n BUT CAN NOT be converted into\
\ a BigWig file (w/o removing the first line).\n"
info: null
direction: "input"
- type: "string"
name: "--trackopts"
description: "Writes additional track line definition parameters in the first\
\ line.\n- Example:\n -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'\n\
\ Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Compute the coverage of a feature file among a genome.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "genome coverage"
- "BED"
- "GFF"
- "VCF"
- "BAM"
license: "MIT"
references:
doi:
- "10.1093/bioinformatics/btq033"
links:
repository: "https://github.com/arq5x/bedtools2"
homepage: "https://bedtools.readthedocs.io/en/latest/#"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html"
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "bedtools"
- "procps"
interactive: false
- type: "docker"
run:
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
\ > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/bedtools/bedtools_genomecov/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/bedtools/bedtools_genomecov"
executable: "target/nextflow/bedtools/bedtools_genomecov/main.nf"
viash_version: "0.9.0"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "biobox"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -1,4 +1,4 @@
// rsem_calculate_expression main
// bedtools_genomecov main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -8,6 +8,9 @@
// authors of this component should specify the license in the header of such
// files, or include a separate license file detailing the licenses of all included
// files.
//
// Component authors:
// * Theodoro Gasperin Terra Camargo (author, maintainer)
////////////////////////////
// VDSL3 helper functions //
@@ -2804,60 +2807,46 @@ nextflow.enable.dsl=2
meta = [
"resources_dir": moduleDir.toRealPath().normalize(),
"config": processConfig(readJsonBlob('''{
"name" : "rsem_calculate_expression",
"namespace" : "rsem",
"name" : "bedtools_genomecov",
"namespace" : "bedtools",
"version" : "main",
"authors" : [
{
"name" : "Theodoro Gasperin Terra Camargo",
"roles" : [
"author",
"maintainer"
],
"info" : {
"links" : {
"email" : "theodorogtc@gmail.com",
"github" : "tgaspe",
"linkedin" : "theodoro-gasperin-terra-camargo"
},
"organizations" : [
{
"name" : "Data Intuitive",
"href" : "https://www.data-intuitive.com",
"role" : "Bioinformatician"
}
]
}
}
],
"argument_groups" : [
{
"name" : "Input",
"name" : "Inputs",
"arguments" : [
{
"type" : "string",
"name" : "--id",
"description" : "Sample ID.",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--strandedness",
"description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse",
"required" : false,
"choices" : [
"forward",
"reverse",
"unstranded"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean",
"name" : "--paired",
"description" : "Paired-end reads or not?",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--input",
"description" : "Input reads for quantification.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--index",
"description" : "RSEM index.",
"alternatives" : [
"-i"
],
"description" : "The input file (BED/GFF/VCF) to be used.\n",
"example" : [
"input.bed"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
@@ -2866,9 +2855,31 @@ meta = [
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--extra_args",
"description" : "Extra rsem-calculate-expression arguments in addition to the defaults.",
"type" : "file",
"name" : "--input_bam",
"alternatives" : [
"-ibam"
],
"description" : "The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n'--genome' option is ignored if you use '--input_bam' option!\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--genome",
"alternatives" : [
"-g"
],
"description" : "The genome file to be used.\n",
"example" : [
"genome.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
@@ -2877,104 +2888,166 @@ meta = [
]
},
{
"name" : "Output",
"name" : "Outputs",
"arguments" : [
{
"type" : "file",
"name" : "--counts_gene",
"description" : "Expression counts on gene level",
"name" : "--output",
"description" : "The output BED file. \n",
"example" : [
"sample.genes.results"
"output.bed"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"required" : true,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "Options",
"arguments" : [
{
"type" : "boolean_true",
"name" : "--depth",
"alternatives" : [
"-d"
],
"description" : "Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--depth_zero",
"alternatives" : [
"-dz"
],
"description" : "Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--bed_graph",
"alternatives" : [
"-bg"
],
"description" : "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--bed_graph_zero_coverage",
"alternatives" : [
"-bga"
],
"description" : "Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \\"grep -w 0$\\" to the output.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--split",
"description" : "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--ignore_deletion",
"alternatives" : [
"-ignoreD"
],
"description" : "Ignore local deletions (CIGAR \\"D\\" operations) in BAM entries\nwhen computing coverage.\n",
"direction" : "input"
},
{
"type" : "string",
"name" : "--strand",
"description" : "Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n",
"required" : false,
"choices" : [
"+",
"-"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--counts_transcripts",
"description" : "Expression counts on transcript level",
"example" : [
"sample.isoforms.results"
"type" : "boolean_true",
"name" : "--pair_end_coverage",
"alternatives" : [
"-pc"
],
"must_exist" : true,
"create_parent" : true,
"description" : "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--fragment_size",
"alternatives" : [
"-fs"
],
"description" : "Force to use provided fragment size instead of read length\nWorks for BAM files only\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--du",
"description" : "Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--five_prime",
"alternatives" : [
"-5"
],
"description" : "Calculate coverage of 5\\" positions (instead of entire interval).\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--three_prime",
"alternatives" : [
"-3"
],
"description" : "Calculate coverage of 3\\" positions (instead of entire interval).\n",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--max",
"description" : "Combine all positions with a depth >= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n",
"required" : false,
"direction" : "output",
"min" : 0,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--stat",
"description" : "RSEM statistics",
"example" : [
"sample.stat"
],
"must_exist" : true,
"create_parent" : true,
"type" : "double",
"name" : "--scale",
"description" : "Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n",
"required" : false,
"direction" : "output",
"min" : 0.0,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--logs",
"description" : "RSEM logs",
"example" : [
"sample.log"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
"type" : "boolean_true",
"name" : "--trackline",
"description" : "Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n",
"direction" : "input"
},
{
"type" : "file",
"name" : "--bam_star",
"description" : "BAM file generated by STAR (optional)",
"example" : [
"sample.STAR.genome.bam"
],
"must_exist" : true,
"create_parent" : true,
"type" : "string",
"name" : "--trackopts",
"description" : "Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts 'name=\\"My Track\\" visibility=2 color=255,30,30'\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n",
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--bam_genome",
"description" : "Genome BAM file (optional)",
"example" : [
"sample.genome.bam"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--bam_transcript",
"description" : "Transcript BAM file (optional)",
"example" : [
"sample.transcript.bam"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
}
]
@@ -2987,7 +3060,7 @@ meta = [
"is_executable" : true
}
],
"description" : "Calculate expression with RSEM.\n",
"description" : "Compute the coverage of a feature file among a genome.\n",
"test_resources" : [
{
"type" : "bash_script",
@@ -2996,47 +3069,34 @@ meta = [
},
{
"type" : "file",
"path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz"
},
{
"type" : "file",
"path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz"
},
{
"type" : "file",
"path" : "/testData/minimal_test/reference/rsem.tar.gz"
"path" : "test_data"
}
],
"info" : {
"migration_info" : {
"git_repo" : "https://github.com/nf-core/rnaseq.git",
"paths" : [
"modules/nf-core/rsem/calculateexpression/main.nf",
"modules/nf-core/rsem/calculateexpression/meta.yml"
],
"last_sha" : "92b2a7857de1dda9d1c19a088941fc81e2976ff7"
}
},
"status" : "enabled",
"requirements" : {
"commands" : [
"ps"
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
"keywords" : [
"genome coverage",
"BED",
"GFF",
"VCF",
"BAM"
],
"license" : "MIT",
"references" : {
"doi" : [
"10.1093/bioinformatics/btq033"
]
},
"links" : {
"repository" : "https://github.com/arq5x/bedtools2",
"homepage" : "https://bedtools.readthedocs.io/en/latest/#",
"documentation" : "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html",
"issue_tracker" : "https://github.com/arq5x/bedtools2/issues"
},
"runners" : [
{
"type" : "executable",
@@ -3115,7 +3175,7 @@ meta = [
{
"type" : "docker",
"id" : "docker",
"image" : "ubuntu:22.04",
"image" : "debian:stable-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "main",
"namespace_separator" : "/",
@@ -3123,31 +3183,15 @@ meta = [
{
"type" : "apt",
"packages" : [
"build-essential",
"gcc",
"g++",
"make",
"wget",
"zlib1g-dev",
"unzip",
"xxd",
"perl",
"r-base",
"bowtie2",
"python3-pip",
"git"
"bedtools",
"procps"
],
"interactive" : false
},
{
"type" : "docker",
"run" : [
"ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\nunzip ${STAR_VERSION}.zip && \\\\\ncd STAR-${STAR_VERSION}/source && \\\\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\ncp STAR /usr/local/bin && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \\\\\nunzip v${RSEM_VERSION}.zip && \\\\\ncd RSEM-${RSEM_VERSION} && \\\\\nmake && \\\\\nmake install && \\\\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\nrm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \\\\\ncd && \\\\\napt-get clean && \\\\\necho 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \\\\\necho 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \\\\\n/bin/bash -c \\"source /etc/profile && source ~/.bashrc && echo $PATH && which STAR\\"\n"
],
"env" : [
"STAR_VERSION=2.7.11b",
"RSEM_VERSION=1.3.3",
"TZ=Europe/Brussels"
"echo \\"bedtools: \\\\\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\\\\"\\" > /var/software_versions.txt\n"
]
}
]
@@ -3158,49 +3202,39 @@ meta = [
}
],
"build_info" : {
"config" : "/workdir/root/repo/src/rsem/rsem_calculate_expression/config.vsh.yaml",
"config" : "/workdir/root/repo/src/bedtools/bedtools_genomecov/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/rsem/rsem_calculate_expression",
"output" : "target/nextflow/bedtools/bedtools_genomecov",
"viash_version" : "0.9.0",
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
"git_tag" : "v0.2.0-26-ga13b57d"
},
"package_config" : {
"name" : "rnaseq",
"version" : "main",
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-test-data/rnaseq/v1",
"dest" : "testData"
}
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"version" : "main",
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
"viash_version" : "0.9.0",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"source" : "src",
"target" : "target",
"config_mods" : [
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
".requirements.commands := ['ps']\n",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'main'"
],
"organization" : "vsh"
"keywords" : [
"bioinformatics",
"modules",
"sequencing"
],
"license" : "MIT",
"organization" : "vsh",
"links" : {
"repository" : "https://github.com/viash-hub/biobox",
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
}
}
}'''))
]
@@ -3214,21 +3248,30 @@ def innerWorkflowFactory(args) {
def rawScript = '''set -e
tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
#!/bin/bash
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi )
$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi )
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi )
$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi )
$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi )
$( if [ ! -z ${VIASH_PAR_STAT+x} ]; then echo "${VIASH_PAR_STAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stat='&'#" ; else echo "# par_stat="; fi )
$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi )
$( if [ ! -z ${VIASH_PAR_BAM_STAR+x} ]; then echo "${VIASH_PAR_BAM_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_star='&'#" ; else echo "# par_bam_star="; fi )
$( if [ ! -z ${VIASH_PAR_BAM_GENOME+x} ]; then echo "${VIASH_PAR_BAM_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_genome='&'#" ; else echo "# par_bam_genome="; fi )
$( if [ ! -z ${VIASH_PAR_BAM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_BAM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_transcript='&'#" ; else echo "# par_bam_transcript="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_BAM+x} ]; then echo "${VIASH_PAR_INPUT_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_bam='&'#" ; else echo "# par_input_bam="; fi )
$( if [ ! -z ${VIASH_PAR_GENOME+x} ]; then echo "${VIASH_PAR_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome='&'#" ; else echo "# par_genome="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_PAR_DEPTH+x} ]; then echo "${VIASH_PAR_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth='&'#" ; else echo "# par_depth="; fi )
$( if [ ! -z ${VIASH_PAR_DEPTH_ZERO+x} ]; then echo "${VIASH_PAR_DEPTH_ZERO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth_zero='&'#" ; else echo "# par_depth_zero="; fi )
$( if [ ! -z ${VIASH_PAR_BED_GRAPH+x} ]; then echo "${VIASH_PAR_BED_GRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph='&'#" ; else echo "# par_bed_graph="; fi )
$( if [ ! -z ${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE+x} ]; then echo "${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph_zero_coverage='&'#" ; else echo "# par_bed_graph_zero_coverage="; fi )
$( if [ ! -z ${VIASH_PAR_SPLIT+x} ]; then echo "${VIASH_PAR_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split='&'#" ; else echo "# par_split="; fi )
$( if [ ! -z ${VIASH_PAR_IGNORE_DELETION+x} ]; then echo "${VIASH_PAR_IGNORE_DELETION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_deletion='&'#" ; else echo "# par_ignore_deletion="; fi )
$( if [ ! -z ${VIASH_PAR_STRAND+x} ]; then echo "${VIASH_PAR_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strand='&'#" ; else echo "# par_strand="; fi )
$( if [ ! -z ${VIASH_PAR_PAIR_END_COVERAGE+x} ]; then echo "${VIASH_PAR_PAIR_END_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pair_end_coverage='&'#" ; else echo "# par_pair_end_coverage="; fi )
$( if [ ! -z ${VIASH_PAR_FRAGMENT_SIZE+x} ]; then echo "${VIASH_PAR_FRAGMENT_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_size='&'#" ; else echo "# par_fragment_size="; fi )
$( if [ ! -z ${VIASH_PAR_DU+x} ]; then echo "${VIASH_PAR_DU}" | sed "s#'#'\\"'\\"'#g;s#.*#par_du='&'#" ; else echo "# par_du="; fi )
$( if [ ! -z ${VIASH_PAR_FIVE_PRIME+x} ]; then echo "${VIASH_PAR_FIVE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_five_prime='&'#" ; else echo "# par_five_prime="; fi )
$( if [ ! -z ${VIASH_PAR_THREE_PRIME+x} ]; then echo "${VIASH_PAR_THREE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime='&'#" ; else echo "# par_three_prime="; fi )
$( if [ ! -z ${VIASH_PAR_MAX+x} ]; then echo "${VIASH_PAR_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max='&'#" ; else echo "# par_max="; fi )
$( if [ ! -z ${VIASH_PAR_SCALE+x} ]; then echo "${VIASH_PAR_SCALE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scale='&'#" ; else echo "# par_scale="; fi )
$( if [ ! -z ${VIASH_PAR_TRACKLINE+x} ]; then echo "${VIASH_PAR_TRACKLINE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackline='&'#" ; else echo "# par_trackline="; fi )
$( if [ ! -z ${VIASH_PAR_TRACKOPTS+x} ]; then echo "${VIASH_PAR_TRACKOPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackopts='&'#" ; else echo "# par_trackopts="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3249,47 +3292,57 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
## VIASH END
#!/bin/bash
# Exit on error
set -eo pipefail
function clean_up {
rm -rf "\\$tmpdir"
}
trap clean_up EXIT
# Unset variables
unset_if_false=(
par_input_bam
par_depth
par_depth_zero
par_bed_graph
par_bed_graph_zero_coverage
par_split
par_ignore_deletion
par_pair_end_coverage
par_fragment_size
par_du
par_five_prime
par_three_prime
par_trackline
)
tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX")
for par in \\${unset_if_false[@]}; do
test_val="\\${!par}"
[[ "\\$test_val" == "false" ]] && unset \\$par
done
[[ "\\$par_paired" == "false" ]] && unset par_paired
# Create input array
IFS=";" read -ra trackopts <<< \\$par_trackopts
if [ \\$par_strandedness == 'forward' ]; then
strandedness='--strandedness forward'
elif [ \\$par_strandedness == 'reverse' ]; then
strandedness='--strandedness reverse'
else
strandedness=''
fi
bedtools genomecov \\\\
\\${par_depth:+-d} \\\\
\\${par_depth_zero:+-dz} \\\\
\\${par_bed_graph:+-bg} \\\\
\\${par_bed_graph_zero_coverage:+-bga} \\\\
\\${par_split:+-split} \\\\
\\${par_ignore_deletion:+-ignoreD} \\\\
\\${par_du:+-du} \\\\
\\${par_five_prime:+-5} \\\\
\\${par_three_prime:+-3} \\\\
\\${par_trackline:+-trackline} \\\\
\\${par_strand:+-strand "\\$par_strand"} \\\\
\\${par_max:+-max "\\$par_max"} \\\\
\\${par_scale:+-scale "\\$par_scale"} \\\\
\\${par_trackopts:+-trackopts "\\${trackopts[*]}"} \\\\
\\${par_input_bam:+-ibam "\\$par_input_bam"} \\\\
\\${par_input:+-i "\\$par_input"} \\\\
\\${par_genome:+-g "\\$par_genome"} \\\\
\\${par_pair_end_coverage:+-pc} \\\\
\\${par_fragment_size:+-fs} \\\\
> "\\$par_output"
IFS=";" read -ra input <<< \\$par_input
INDEX=\\`find -L \\$par_index/ -name "*.grp" | sed 's/\\\\.grp\\$//'\\`
rsem-calculate-expression \\\\
\\${meta_cpus:+--num-threads \\$meta_cpus} \\\\
\\$strandedness \\\\
\\${par_paired:+--paired-end} \\\\
\\$par_extra_args \\\\
\\${input[*]} \\\\
\\$INDEX \\\\
\\$par_id
[[ -e "\\${par_id}.genes.results" ]] && mv "\\${par_id}.genes.results" \\$par_counts_gene
[[ -e "\\${par_id}id.isoforms.results" ]] && mv "\\${par_id}id.isoforms.results" \\$par_counts_transcripts
[[ -e "\\${par_id}.stat" ]] && mv "\\${par_id}.stat" \\$par_stat
# [[ -e "\\${par_id}.log" ]] && mv "\\${par_id}.log" \\$par_logs
[[ -e "\\${par_id}.STAR.genome.bam" ]] && mv "\\${par_id}.STAR.genome.bam" \\$par_bam_star
[[ -e "\\${par_id}.genome.bam" ]] && mv "\\${par_id}.genome.bam" \\$par_bam_genome
[[ -e "\\${par_id}.transcript.bam" ]] && mv "\\${par_id}.transcript.bam" \\$par_bam_transcript
VIASHMAIN
bash "$tempscript"
'''
@@ -3650,7 +3703,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/rnaseq/rsem/rsem_calculate_expression",
"image" : "vsh/biobox/bedtools/bedtools_genomecov",
"tag" : "main"
},
"tag" : "$id"

View File

@@ -0,0 +1,126 @@
manifest {
name = 'bedtools/bedtools_genomecov'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Compute the coverage of a feature file among a genome.\n'
author = 'Theodoro Gasperin Terra Camargo'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,303 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "bedtools_genomecov",
"description": "Compute the coverage of a feature file among a genome.\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used",
"help_text": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used.\n"
}
,
"input_bam": {
"type":
"string",
"description": "Type: `file`. The input file is in BAM format",
"help_text": "Type: `file`. The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n\u0027--genome\u0027 option is ignored if you use \u0027--input_bam\u0027 option!\n"
}
,
"genome": {
"type":
"string",
"description": "Type: `file`, example: `genome.txt`. The genome file to be used",
"help_text": "Type: `file`, example: `genome.txt`. The genome file to be used.\n"
}
}
},
"outputs" : {
"title": "Outputs",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file",
"help_text": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file. \n"
,
"default": "$id.$key.output.bed"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"depth": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates)",
"help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n"
,
"default": "False"
}
,
"depth_zero": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates)",
"help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n"
,
"default": "False"
}
,
"bed_graph": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format",
"help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n"
,
"default": "False"
}
,
"bed_graph_zero_coverage": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg)",
"help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \"grep -w 0$\" to the output.\n"
,
"default": "False"
}
,
"split": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals",
"help_text": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n"
,
"default": "False"
}
,
"ignore_deletion": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage",
"help_text": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage.\n"
,
"default": "False"
}
,
"strand": {
"type":
"string",
"description": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand",
"help_text": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n",
"enum": ["+", "-"]
}
,
"pair_end_coverage": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments",
"help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments.\nWorks for BAM files only\n"
,
"default": "False"
}
,
"fragment_size": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n",
"help_text": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n"
,
"default": "False"
}
,
"du": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n",
"help_text": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n"
,
"default": "False"
}
,
"five_prime": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval)",
"help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval).\n"
,
"default": "False"
}
,
"three_prime": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval)",
"help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval).\n"
,
"default": "False"
}
,
"max": {
"type":
"integer",
"description": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram",
"help_text": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n"
}
,
"scale": {
"type":
"number",
"description": "Type: `double`. Scale the coverage by a constant factor",
"help_text": "Type: `double`. Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n"
}
,
"trackline": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output",
"help_text": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n"
,
"default": "False"
}
,
"trackopts": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line",
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts \u0027name=\"My Track\" visibility=2 color=255,30,30\u0027\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/outputs"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,367 @@
name: "fastqc"
version: "main"
authors:
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "FASTQ file(s) to be analyzed.\n"
info: null
example:
- "input.fq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Outputs"
description: "At least one of the output options (--html, --zip, --summary, --data)\
\ must be used.\n"
arguments:
- type: "file"
name: "--html"
description: "Create the HTML report of the results. \n'*' wild card must be provided\
\ in the output file name. \nWild card will be replaced by the input file basename.\n\
e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\
\ html file named sample_1.html\n"
info: null
example:
- "*.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--zip"
description: "Create the zip file(s) containing: html report, data, images, icons,\
\ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\
\ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\
\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
info: null
example:
- "*.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--summary"
description: "Create the summary file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\
\ an output summary.txt file named sample_1_summary.txt\n"
info: null
example:
- "*_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--data"
description: "Create the data file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\
\ output data.txt file named sample_1_data.txt\n"
info: null
example:
- "*_data.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--casava"
description: "Files come from raw casava output. Files in the same sample\ngroup\
\ (differing only by the group number) will be analysed\nas a set rather than\
\ individually. Sequences with the filter\nflag set in the header will be excluded\
\ from the analysis.\nFiles must have the same names given to them by casava\n\
(including being gzipped and ending with .gz) otherwise they\nwon't be grouped\
\ together correctly.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nano"
description: "Files come from nanopore sequences and are in fast5 format. In\n\
this mode you can pass in directories to process and the program\nwill take\
\ in all fast5 files within those directories and produce\na single output file\
\ from the sequences found in all files.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nofilter"
description: "If running with --casava then don't remove read flagged by\ncasava\
\ as poor quality when performing the QC analysis.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nogroup"
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
\ data for every base in the read. \nWARNING: Using this option will cause fastqc\
\ to crash \nand burn if you use it on really long reads, and your \nplots may\
\ end up a ridiculous size. You have been warned!\n"
info: null
direction: "input"
- type: "integer"
name: "--min_length"
description: "Sets an artificial lower limit on the length of the \nsequence to\
\ be shown in the report. As long as you \nset this to a value greater or equal\
\ to your longest \nread length then this will be the sequence length used \n\
to create your read groups. This can be useful for making\ndirectly comparable\
\ statistics from datasets with somewhat \nvariable read lengths.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format detection and \nforces\
\ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\
\ sam_mapped, and fastq.\n"
info: null
example:
- "bam"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--contaminants"
alternatives:
- "-c"
description: "Specifies a non-default file which contains the list \nof contaminants\
\ to screen overrepresented sequences against. \nThe file must contain sets\
\ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\
\ a hash will be ignored.\n"
info: null
example:
- "contaminants.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--adapters"
alternatives:
- "-a"
description: "Specifies a non-default file which contains the list of \nadapter\
\ sequences which will be explicitly searched against \nthe library. The file\
\ must contain sets of named adapters \nin the form name[tab]sequence. Lines\
\ prefixed with a hash will be ignored.\n"
info: null
example:
- "adapters.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--limits"
alternatives:
- "-l"
description: "Specifies a non-default file which contains \na set of criteria\
\ which will be used to determine \nthe warn/error limits for the various modules.\
\ \nThis file can also be used to selectively remove \nsome modules from the\
\ output altogether. The format \nneeds to mirror the default limits.txt file\
\ found in \nthe Configuration folder.\n"
info: null
example:
- "limits.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--kmers"
alternatives:
- "-k"
description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\
\ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\
\ specified.\n"
info: null
example:
- 7
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Suppress all progress messages on stdout and only report errors.\n"
info: null
direction: "input"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "FastQC - A high throughput sequence QC analysis tool."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Quality control"
- "BAM"
- "SAM"
- "FASTQ"
license: "GPL-3.0, Apache-2.0"
links:
repository: "https://github.com/s-andrews/FastQC"
homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/"
issue_tracker: "https://github.com/s-andrews/FastQC/issues"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "biocontainers/fastqc:v0.11.9_cv8"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/fastqc/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/fastqc"
executable: "target/nextflow/fastqc/main.nf"
viash_version: "0.9.0"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "biobox"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
manifest {
name = 'fastqc'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'FastQC - A high throughput sequence QC analysis tool.'
author = 'Theodoro Gasperin Terra Camargo'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,257 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "fastqc",
"description": "FastQC - A high throughput sequence QC analysis tool.",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed",
"help_text": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed.\n"
}
}
},
"outputs" : {
"title": "Outputs",
"type": "object",
"description": "At least one of the output options (--html, --zip, --summary, --data) must be used.\n",
"properties": {
"html": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results",
"help_text": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results. \n\u0027*\u0027 wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output html file named sample_1.html\n"
,
"default": "$id.$key.html_*.html"
}
,
"zip": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc",
"help_text": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
,
"default": "$id.$key.zip_*.zip"
}
,
"summary": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s)",
"help_text": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create an output summary.txt file named sample_1_summary.txt\n"
,
"default": "$id.$key.summary_*.txt"
}
,
"data": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s)",
"help_text": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an output data.txt file named sample_1_data.txt\n"
,
"default": "$id.$key.data_*.txt"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"casava": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Files come from raw casava output",
"help_text": "Type: `boolean_true`, default: `false`. Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon\u0027t be grouped together correctly.\n"
,
"default": "False"
}
,
"nano": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format",
"help_text": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n"
,
"default": "False"
}
,
"nofilter": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis",
"help_text": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis.\n"
,
"default": "False"
}
,
"nogroup": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp",
"help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n"
,
"default": "False"
}
,
"min_length": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report",
"help_text": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n"
}
,
"format": {
"type":
"string",
"description": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format",
"help_text": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n"
}
,
"contaminants": {
"type":
"string",
"description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against",
"help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n"
}
,
"adapters": {
"type":
"string",
"description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library",
"help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n"
}
,
"limits": {
"type":
"string",
"description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules",
"help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n"
}
,
"kmers": {
"type":
"integer",
"description": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module",
"help_text": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n"
}
,
"quiet": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors",
"help_text": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors.\n"
,
"default": "False"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/outputs"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -1,22 +1,23 @@
name: "umitools_dedup"
namespace: "umitools"
name: "fq_subsample"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "boolean"
name: "--paired"
description: "Paired fastq files or not?"
- type: "file"
name: "--input_1"
description: "First input fastq file to subsample. Accepts both raw and gzipped\
\ FASTQ inputs."
info: null
default:
- false
required: false
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam"
description: "Input BAM file"
name: "--input_2"
description: "Second input fastq files to subsample. Accepts both raw and gzipped\
\ FASTQ inputs."
info: null
must_exist: true
create_parent: true
@@ -24,32 +25,12 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bai"
description: "BAM index"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--get_output_stats"
description: "Whether or not to generate output stats."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output_bam"
description: "Deduplicated BAM file"
name: "--output_1"
description: "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`."
info: null
default:
- "$id.$key.bam"
must_exist: true
create_parent: true
required: false
@@ -57,51 +38,68 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_stats"
description: "Directory containing UMI based dedupllication statistics files"
name: "--output_2"
description: "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`."
info: null
default:
- "$id.umi_dedup.stats"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "double"
name: "--probability"
description: "The probability a record is kept, as a percentage (0.0, 1.0). Cannot\
\ be used with `record-count`"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--record_count"
description: "The exact number of records to keep. Cannot be used with `probability`"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Seed to use for the random number generator"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\
\ to the read.\n"
description: "fq subsample outputs a subset of records from single or paired FASTQ\
\ files."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "chr19.bam"
- type: "file"
path: "chr19.bam.bai"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/umitools/dedup/main.nf"
- "modules/nf-core/umitools/dedup/meta.yml"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
keywords:
- "fastq"
- "subsample"
- "subset"
license: "MIT"
links:
repository: "https://github.com/stjude-rust-labs/fq"
homepage: "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
documentation: "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
runners:
- type: "executable"
id: "executable"
@@ -170,56 +168,50 @@ runners:
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
image: "rust:1.81-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "pip"
interactive: false
- type: "python"
user: false
packages:
- "umi_tools"
upgrade: true
- type: "docker"
run:
- "apt-get update && apt-get install -y git procps && \\\ngit clone --depth 1\
\ --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\ncd fq &&\
\ \\\ncargo install --locked --path . && \\\nmv target/release/fq /usr/local/bin/\
\ && \\\ncd / && rm -rf /fq\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/umitools/umitools_dedup/config.vsh.yaml"
config: "src/fq_subsample/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/umitools/umitools_dedup"
executable: "target/nextflow/umitools/umitools_dedup/main.nf"
output: "target/nextflow/fq_subsample"
executable: "target/nextflow/fq_subsample/main.nf"
viash_version: "0.9.0"
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "rnaseq"
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -2812,22 +2812,21 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--input",
"description" : "Input fastq files to subsample",
"name" : "--input_1",
"description" : "First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"required" : true,
"direction" : "input",
"multiple" : true,
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--extra_args",
"description" : "Extra arguments to pass to fq subsample",
"default" : [
""
],
"type" : "file",
"name" : "--input_2",
"description" : "Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
@@ -2836,15 +2835,12 @@ meta = [
]
},
{
"name" : "Input",
"name" : "Output",
"arguments" : [
{
"type" : "file",
"name" : "--output_1",
"description" : "Sampled read 1 fastq files",
"default" : [
"$id.read_1.subsampled.fastq"
],
"description" : "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
@@ -2855,11 +2851,8 @@ meta = [
{
"type" : "file",
"name" : "--output_2",
"description" : "Sampled read 2 fastq files",
"default" : [
"$id.read_2.subsampled.fastq"
],
"must_exist" : false,
"description" : "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
@@ -2867,6 +2860,38 @@ meta = [
"multiple_sep" : ";"
}
]
},
{
"name" : "Options",
"arguments" : [
{
"type" : "double",
"name" : "--probability",
"description" : "The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--record_count",
"description" : "The exact number of records to keep. Cannot be used with `probability`",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--seed",
"description" : "Seed to use for the random number generator",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
],
"resources" : [
@@ -2876,7 +2901,7 @@ meta = [
"is_executable" : true
}
],
"description" : "fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n",
"description" : "fq subsample outputs a subset of records from single or paired FASTQ files.",
"test_resources" : [
{
"type" : "bash_script",
@@ -2885,43 +2910,26 @@ meta = [
},
{
"type" : "file",
"path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz"
},
{
"type" : "file",
"path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz"
"path" : "test_data"
}
],
"info" : {
"migration_info" : {
"git_repo" : "https://github.com/nf-core/rnaseq.git",
"paths" : [
"modules/nf-core/fq/subsample/main.nf",
"modules/nf-core/fq/subsample/meta.yml"
],
"last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33"
}
},
"status" : "enabled",
"requirements" : {
"commands" : [
"ps"
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
"keywords" : [
"fastq",
"subsample",
"subset"
],
"license" : "MIT",
"links" : {
"repository" : "https://github.com/stjude-rust-labs/fq",
"homepage" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md",
"documentation" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
},
"runners" : [
{
"type" : "executable",
@@ -3000,7 +3008,7 @@ meta = [
{
"type" : "docker",
"id" : "docker",
"image" : "ubuntu:22.04",
"image" : "rust:1.81-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "main",
"namespace_separator" : "/",
@@ -3008,10 +3016,7 @@ meta = [
{
"type" : "docker",
"run" : [
"ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\napt-get update && \\\\\napt-get install -y --no-install-recommends build-essential git-all curl && \\\\\ncurl https://sh.rustup.rs -sSf | sh -s -- -y && \\\\\n. \\"$HOME/.cargo/env\\" && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\nmv fq /usr/local/ && cd /usr/local/fq && \\\\\ncargo install --locked --path . && \\\\\nmv /usr/local/fq/target/release/fq /usr/local/bin/\n"
],
"env" : [
"TZ=Europe/Brussels"
"apt-get update && apt-get install -y git procps && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\ncd fq && \\\\\ncargo install --locked --path . && \\\\\nmv target/release/fq /usr/local/bin/ && \\\\\ncd / && rm -rf /fq\n"
]
}
]
@@ -3025,46 +3030,36 @@ meta = [
"config" : "/workdir/root/repo/src/fq_subsample/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/fq_subsample",
"output" : "target/nextflow/fq_subsample",
"viash_version" : "0.9.0",
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
"git_tag" : "v0.2.0-26-ga13b57d"
},
"package_config" : {
"name" : "rnaseq",
"version" : "main",
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-test-data/rnaseq/v1",
"dest" : "testData"
}
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"version" : "main",
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
"viash_version" : "0.9.0",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"source" : "src",
"target" : "target",
"config_mods" : [
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
".requirements.commands := ['ps']\n",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'main'"
],
"organization" : "vsh"
"keywords" : [
"bioinformatics",
"modules",
"sequencing"
],
"license" : "MIT",
"organization" : "vsh",
"links" : {
"repository" : "https://github.com/viash-hub/biobox",
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
}
}
}'''))
]
@@ -3078,12 +3073,17 @@ def innerWorkflowFactory(args) {
def rawScript = '''set -e
tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
#!/bin/bash
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_1+x} ]; then echo "${VIASH_PAR_INPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_1='&'#" ; else echo "# par_input_1="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_2+x} ]; then echo "${VIASH_PAR_INPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_2='&'#" ; else echo "# par_input_2="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_1+x} ]; then echo "${VIASH_PAR_OUTPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_1='&'#" ; else echo "# par_output_1="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_2+x} ]; then echo "${VIASH_PAR_OUTPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_2='&'#" ; else echo "# par_output_2="; fi )
$( if [ ! -z ${VIASH_PAR_PROBABILITY+x} ]; then echo "${VIASH_PAR_PROBABILITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_probability='&'#" ; else echo "# par_probability="; fi )
$( if [ ! -z ${VIASH_PAR_RECORD_COUNT+x} ]; then echo "${VIASH_PAR_RECORD_COUNT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_record_count='&'#" ; else echo "# par_record_count="; fi )
$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3104,29 +3104,27 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
## VIASH END
#!/bin/bash
set -eo pipefail
IFS=";" read -ra input <<< \\$par_input
n_fastq=\\${#input[@]}
required_args=("-p" "--probability" "-n" "--read-count")
for arg in "\\${required_args[@]}"; do
if [[ "\\$par_extra_args" == *"\\$arg"* ]]; then
echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args"
exit 1
fi
done
required_args=("-p" "--probability" "-n" "--record_count")
if [ \\$n_fastq -eq 1 ]; then
fq subsample \\$par_extra_args \\${input[*]} --r1-dst \\$par_output_1
elif [ \\$n_fastq -eq 2 ]; then
fq subsample \\$par_extra_args \\${input[*]} --r1-dst \\$par_output_1 --r2-dst \\$par_output_2
else
echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!"
# exclusive OR for required arguments \\$par_probability and \\$par_record_count
if [[ -n \\$par_probability && -n \\$par_record_count ]] || [[ -z \\$par_probability && -z \\$par_record_count ]]; then
echo "FQ/SUBSAMPLE requires either --probability or --record_count to be specified"
exit 1
fi
fq subsample \\\\
\\${par_output_1:+--r1-dst "\\${par_output_1}"} \\\\
\\${par_output_2:+--r2-dst "\\${par_output_2}"} \\\\
\\${par_probability:+--probability "\\${par_probability}"} \\\\
\\${par_record_count:+--record-count "\\${par_record_count}"} \\\\
\\${par_seed:+--seed "\\${par_seed}"} \\\\
\\${par_input_1} \\\\
\\${par_input_2}
VIASHMAIN
bash "$tempscript"
'''
@@ -3487,7 +3485,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/rnaseq/fq_subsample",
"image" : "vsh/biobox/fq_subsample",
"tag" : "main"
},
"tag" : "$id"

View File

@@ -3,7 +3,7 @@ manifest {
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n'
description = 'fq subsample outputs a subset of records from single or paired FASTQ files.'
}
process.container = 'nextflow/bash:latest'

View File

@@ -1,7 +1,7 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "umitools_dedup",
"description": "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n",
"title": "fq_subsample",
"description": "fq subsample outputs a subset of records from single or paired FASTQ files.",
"type": "object",
"definitions": {
@@ -14,42 +14,21 @@
"properties": {
"paired": {
"type":
"boolean",
"description": "Type: `boolean`, default: `false`. Paired fastq files or not?",
"help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?"
,
"default":false
}
,
"bam": {
"input_1": {
"type":
"string",
"description": "Type: `file`. Input BAM file",
"help_text": "Type: `file`. Input BAM file"
"description": "Type: `file`, required. First input fastq file to subsample",
"help_text": "Type: `file`, required. First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs."
}
,
"bai": {
"input_2": {
"type":
"string",
"description": "Type: `file`. BAM index",
"help_text": "Type: `file`. BAM index"
}
,
"get_output_stats": {
"type":
"boolean",
"description": "Type: `boolean`. Whether or not to generate output stats",
"help_text": "Type: `boolean`. Whether or not to generate output stats."
"description": "Type: `file`. Second input fastq files to subsample",
"help_text": "Type: `file`. Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs."
}
@@ -65,24 +44,64 @@
"properties": {
"output_bam": {
"output_1": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output_bam.bam`. Deduplicated BAM file",
"help_text": "Type: `file`, default: `$id.$key.output_bam.bam`. Deduplicated BAM file"
"description": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files",
"help_text": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`."
,
"default":"$id.$key.output_bam.bam"
"default": "$id.$key.output_1.output_1"
}
,
"output_stats": {
"output_2": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output_stats.stats`. Directory containing UMI based dedupllication statistics files",
"help_text": "Type: `file`, default: `$id.$key.output_stats.stats`. Directory containing UMI based dedupllication statistics files"
"description": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files",
"help_text": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`."
,
"default":"$id.$key.output_stats.stats"
"default": "$id.$key.output_2.output_2"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"probability": {
"type":
"number",
"description": "Type: `double`. The probability a record is kept, as a percentage (0",
"help_text": "Type: `double`. The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`"
}
,
"record_count": {
"type":
"integer",
"description": "Type: `integer`. The exact number of records to keep",
"help_text": "Type: `integer`. The exact number of records to keep. Cannot be used with `probability`"
}
,
"seed": {
"type":
"integer",
"description": "Type: `integer`. Seed to use for the random number generator",
"help_text": "Type: `integer`. Seed to use for the random number generator"
}
@@ -130,6 +149,10 @@
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}

View File

@@ -5,18 +5,22 @@ argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--transcriptome_fasta"
name: "--input"
description: "Path to a FASTA-file containing the transcriptome sequences, either\
\ in plain text or \ncompressed (.gz) format.\n"
info: null
must_exist: true
create_parent: true
required: false
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--pseudo_aligner_kmer_size"
description: "Kmer length passed to indexing step of pseudoaligners."
- type: "file"
name: "--d_list"
description: "Path to a FASTA-file containing sequences to mask from quantification.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
@@ -24,9 +28,9 @@ argument_groups:
- name: "Output"
arguments:
- type: "file"
name: "--kallisto_index"
name: "--index"
info: null
default:
example:
- "Kallisto_index"
must_exist: true
create_parent: true
@@ -34,37 +38,96 @@ argument_groups:
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--kmer_size"
description: "Kmer length passed to indexing step of pseudoaligners (default:\
\ '31').\n"
info: null
example:
- 31
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--make_unique"
description: "Replace repeated target names with unique names.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--aa"
description: "Generate index from a FASTA-file containing amino acid sequences.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--distiguish"
description: "Generate index where sequences are distinguished by the sequence\
\ names.\n"
info: null
direction: "input"
- type: "integer"
name: "--min_size"
alternatives:
- "-m"
description: "Length of minimizers (default: automatically chosen).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--ec_max_size"
alternatives:
- "-e"
description: "Maximum number of targets in an equivalence class (default: no maximum).\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--tmp"
alternatives:
- "-T"
description: "Path to a directory for temporary files.\n"
info: null
example:
- "tmp"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Create Kallisto index.\n"
description: "Build a Kallisto index for the transcriptome to use Kallisto in the\
\ mapping-based mode.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "transcriptome.fasta"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/kallisto/index/main.nf"
- "modules/nf-core/kallisto/index/meta.yml"
last_sha: "c0816976384d5e7ee6079c29c45958df1ffa0ee4"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
keywords:
- "kallisto"
- "index"
license: "BSD 2-Clause License"
references:
doi:
- "https://doi.org/10.1038/nbt.3519"
links:
repository: "https://github.com/pachterlab/kallisto"
homepage: "https://pachterlab.github.io/kallisto/about"
documentation: "https://pachterlab.github.io/kallisto/manual"
issue_tracker: "https://github.com/pachterlab/kallisto/issues"
runners:
- type: "executable"
id: "executable"
@@ -155,31 +218,28 @@ build_info:
output: "target/nextflow/kallisto/kallisto_index"
executable: "target/nextflow/kallisto/kallisto_index/main.nf"
viash_version: "0.9.0"
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "rnaseq"
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -2813,18 +2813,21 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--transcriptome_fasta",
"name" : "--input",
"description" : "Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"required" : true,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--pseudo_aligner_kmer_size",
"description" : "Kmer length passed to indexing step of pseudoaligners.",
"type" : "file",
"name" : "--d_list",
"description" : "Path to a FASTA-file containing sequences to mask from quantification.\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
@@ -2837,8 +2840,8 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--kallisto_index",
"default" : [
"name" : "--index",
"example" : [
"Kallisto_index"
],
"must_exist" : true,
@@ -2849,6 +2852,80 @@ meta = [
"multiple_sep" : ";"
}
]
},
{
"name" : "Options",
"arguments" : [
{
"type" : "integer",
"name" : "--kmer_size",
"description" : "Kmer length passed to indexing step of pseudoaligners (default: '31').\n",
"example" : [
31
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--make_unique",
"description" : "Replace repeated target names with unique names.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--aa",
"description" : "Generate index from a FASTA-file containing amino acid sequences.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--distiguish",
"description" : "Generate index where sequences are distinguished by the sequence names.\n",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--min_size",
"alternatives" : [
"-m"
],
"description" : "Length of minimizers (default: automatically chosen).\n",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--ec_max_size",
"alternatives" : [
"-e"
],
"description" : "Maximum number of targets in an equivalence class (default: no maximum).\n",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--tmp",
"alternatives" : [
"-T"
],
"description" : "Path to a directory for temporary files.\n",
"example" : [
"tmp"
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
],
"resources" : [
@@ -2858,7 +2935,7 @@ meta = [
"is_executable" : true
}
],
"description" : "Create Kallisto index.\n",
"description" : "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n",
"test_resources" : [
{
"type" : "bash_script",
@@ -2867,39 +2944,31 @@ meta = [
},
{
"type" : "file",
"path" : "/testData/minimal_test/reference/transcriptome.fasta"
"path" : "test_data"
}
],
"info" : {
"migration_info" : {
"git_repo" : "https://github.com/nf-core/rnaseq.git",
"paths" : [
"modules/nf-core/kallisto/index/main.nf",
"modules/nf-core/kallisto/index/meta.yml"
],
"last_sha" : "c0816976384d5e7ee6079c29c45958df1ffa0ee4"
}
},
"status" : "enabled",
"requirements" : {
"commands" : [
"ps"
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
"keywords" : [
"kallisto",
"index"
],
"license" : "BSD 2-Clause License",
"references" : {
"doi" : [
"https://doi.org/10.1038/nbt.3519"
]
},
"links" : {
"repository" : "https://github.com/pachterlab/kallisto",
"homepage" : "https://pachterlab.github.io/kallisto/about",
"documentation" : "https://pachterlab.github.io/kallisto/manual",
"issue_tracker" : "https://github.com/pachterlab/kallisto/issues"
},
"runners" : [
{
"type" : "executable",
@@ -3000,46 +3069,36 @@ meta = [
"config" : "/workdir/root/repo/src/kallisto/kallisto_index/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/kallisto/kallisto_index",
"output" : "target/nextflow/kallisto/kallisto_index",
"viash_version" : "0.9.0",
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
"git_tag" : "v0.2.0-26-ga13b57d"
},
"package_config" : {
"name" : "rnaseq",
"version" : "main",
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-test-data/rnaseq/v1",
"dest" : "testData"
}
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"version" : "main",
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
"viash_version" : "0.9.0",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"source" : "src",
"target" : "target",
"config_mods" : [
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
".requirements.commands := ['ps']\n",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'main'"
],
"organization" : "vsh"
"keywords" : [
"bioinformatics",
"modules",
"sequencing"
],
"license" : "MIT",
"organization" : "vsh",
"links" : {
"repository" : "https://github.com/viash-hub/biobox",
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
}
}
}'''))
]
@@ -3053,11 +3112,20 @@ def innerWorkflowFactory(args) {
def rawScript = '''set -e
tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
#!/bin/bash
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_FASTA+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_fasta='&'#" ; else echo "# par_transcriptome_fasta="; fi )
$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pseudo_aligner_kmer_size='&'#" ; else echo "# par_pseudo_aligner_kmer_size="; fi )
$( if [ ! -z ${VIASH_PAR_KALLISTO_INDEX+x} ]; then echo "${VIASH_PAR_KALLISTO_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kallisto_index='&'#" ; else echo "# par_kallisto_index="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_D_LIST+x} ]; then echo "${VIASH_PAR_D_LIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_d_list='&'#" ; else echo "# par_d_list="; fi )
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
$( if [ ! -z ${VIASH_PAR_KMER_SIZE+x} ]; then echo "${VIASH_PAR_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmer_size='&'#" ; else echo "# par_kmer_size="; fi )
$( if [ ! -z ${VIASH_PAR_MAKE_UNIQUE+x} ]; then echo "${VIASH_PAR_MAKE_UNIQUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_make_unique='&'#" ; else echo "# par_make_unique="; fi )
$( if [ ! -z ${VIASH_PAR_AA+x} ]; then echo "${VIASH_PAR_AA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aa='&'#" ; else echo "# par_aa="; fi )
$( if [ ! -z ${VIASH_PAR_DISTIGUISH+x} ]; then echo "${VIASH_PAR_DISTIGUISH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_distiguish='&'#" ; else echo "# par_distiguish="; fi )
$( if [ ! -z ${VIASH_PAR_MIN_SIZE+x} ]; then echo "${VIASH_PAR_MIN_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_size='&'#" ; else echo "# par_min_size="; fi )
$( if [ ! -z ${VIASH_PAR_EC_MAX_SIZE+x} ]; then echo "${VIASH_PAR_EC_MAX_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ec_max_size='&'#" ; else echo "# par_ec_max_size="; fi )
$( if [ ! -z ${VIASH_PAR_TMP+x} ]; then echo "${VIASH_PAR_TMP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tmp='&'#" ; else echo "# par_tmp="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3078,14 +3146,35 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
## VIASH END
#!/bin/bash
set -eo pipefail
unset_if_false=( par_make_unique par_aa par_distinguish )
for var in "\\${unset_if_false[@]}"; do
temp_var="\\${!var}"
[[ "\\$temp_var" == "false" ]] && unset \\$var
done
if [ -n "\\$par_kmer_size" ]; then
if [[ "\\$par_kmer_size" -lt 1 || "\\$par_kmer_size" -gt 31 || \\$(( par_kmer_size % 2 )) -eq 0 ]]; then
echo "Error: Kmer size must be an odd number between 1 and 31."
exit 1
fi
fi
kallisto index \\\\
\\${par_pseudo_aligner_kmer_size:+-k \\$par_pseudo_aligner_kmer_size} \\\\
-i \\$par_kallisto_index \\\\
\\$par_transcriptome_fasta
-i "\\${par_index}" \\\\
\\${par_kmer_size:+--kmer-size "\\${par_kmer_size}"} \\\\
\\${par_make_unique:+--make-unique} \\\\
\\${par_aa:+--aa} \\\\
\\${par_distinguish:+--distinguish} \\\\
\\${par_min_size:+--min-size "\\${par_min_size}"} \\\\
\\${par_ec_max_size:+--ec-max-size "\\${par_ec_max_size}"} \\\\
\\${par_d_list:+--d-list "\\${par_d_list}"} \\\\
\\${meta_cpus:+--threads "\\${meta_cpus}"} \\\\
\\${par_tmp:+--tmp "\\${par_tmp}"} \\\\
"\\${par_input}"
VIASHMAIN
bash "$tempscript"
'''
@@ -3446,7 +3535,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/rnaseq/kallisto/kallisto_index",
"image" : "vsh/biobox/kallisto/kallisto_index",
"tag" : "main"
},
"tag" : "$id"

View File

@@ -3,7 +3,7 @@ manifest {
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Create Kallisto index.\n'
description = 'Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n'
}
process.container = 'nextflow/bash:latest'

View File

@@ -1,7 +1,7 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "bbmap_bbsplit",
"description": "Split sequencing reads by mapping them to multiple references simultaneously.\n",
"title": "kallisto_index",
"description": "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n",
"type": "object",
"definitions": {
@@ -14,72 +14,21 @@
"properties": {
"id": {
"type":
"string",
"description": "Type: `string`. Sample ID",
"help_text": "Type: `string`. Sample ID"
}
,
"paired": {
"type":
"boolean",
"description": "Type: `boolean`, default: `false`. Paired fastq files or not?",
"help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?"
,
"default":false
}
,
"input": {
"type":
"string",
"description": "Type: List of `file`, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)",
"help_text": "Type: List of `file`, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)"
"description": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (",
"help_text": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n"
}
,
"primary_ref": {
"d_list": {
"type":
"string",
"description": "Type: `file`. Primary reference FASTA",
"help_text": "Type: `file`. Primary reference FASTA"
}
,
"bbsplit_fasta_list": {
"type":
"string",
"description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit",
"help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit."
}
,
"only_build_index": {
"type":
"boolean",
"description": "Type: `boolean`. true = only build index; false = mapping",
"help_text": "Type: `boolean`. true = only build index; false = mapping"
}
,
"built_bbsplit_index": {
"type":
"string",
"description": "Type: `file`. Directory with index files",
"help_text": "Type: `file`. Directory with index files"
"description": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification",
"help_text": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification.\n"
}
@@ -95,35 +44,96 @@
"properties": {
"fastq_1": {
"index": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Output file for read 1",
"help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Output file for read 1."
"description": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. ",
"help_text": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. "
,
"default":"$id.$key.fastq_1.fastq"
"default": "$id.$key.index.index"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"kmer_size": {
"type":
"integer",
"description": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027)",
"help_text": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027).\n"
}
,
"fastq_2": {
"make_unique": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2",
"help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2."
"boolean",
"description": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names",
"help_text": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names.\n"
,
"default":"$id.$key.fastq_2.fastq"
"default": "False"
}
,
"bbsplit_index": {
"aa": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences",
"help_text": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences.\n"
,
"default": "False"
}
,
"distiguish": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names",
"help_text": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names.\n"
,
"default": "False"
}
,
"min_size": {
"type":
"integer",
"description": "Type: `integer`. Length of minimizers (default: automatically chosen)",
"help_text": "Type: `integer`. Length of minimizers (default: automatically chosen).\n"
}
,
"ec_max_size": {
"type":
"integer",
"description": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum)",
"help_text": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum).\n"
}
,
"tmp": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.bbsplit_index.bbsplit_index`. Directory with index files",
"help_text": "Type: `file`, default: `$id.$key.bbsplit_index.bbsplit_index`. Directory with index files"
,
"default":"$id.$key.bbsplit_index.bbsplit_index"
"description": "Type: `string`, example: `tmp`. Path to a directory for temporary files",
"help_text": "Type: `string`, example: `tmp`. Path to a directory for temporary files.\n"
}
@@ -171,6 +181,10 @@
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}

View File

@@ -11,84 +11,33 @@ argument_groups:
info: null
must_exist: true
create_parent: true
required: false
required: true
direction: "input"
multiple: true
multiple_sep: ","
- type: "boolean"
name: "--paired"
description: "Paired reads or not."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--index"
alternatives:
- "-i"
description: "Kallisto genome index."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "Optional gtf file for translation of transcripts into genomic coordinates."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chromosomes"
description: "Optional tab separated file with chromosome names and lengths."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length"
description: "For single-end mode only, the estimated average fragment length."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_sd"
description: "For single-end mode only, the estimated standard deviation of the\
\ fragment length."
info: null
required: false
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--output"
description: "Kallisto quant results"
name: "--output_dir"
alternatives:
- "-o"
description: "Directory to write output to."
info: null
default:
- "$id.kallisto_quant_results"
must_exist: true
create_parent: true
required: false
required: true
direction: "output"
multiple: false
multiple_sep: ";"
@@ -96,73 +45,114 @@ argument_groups:
name: "--log"
description: "File containing log information from running kallisto quant"
info: null
default:
- "$id.kallisto_quant.log.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--run_info"
description: "A json file containing information about the run"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--single"
description: "Single end mode."
info: null
direction: "input"
- type: "boolean_true"
name: "--single_overhang"
description: "Include reads where unobserved rest of fragment is predicted to\
\ lie outside a transcript."
info: null
direction: "input"
- type: "boolean_true"
name: "--fr_stranded"
description: "Strand specific reads, first read forward."
info: null
direction: "input"
- type: "boolean_true"
name: "--rf_stranded"
description: "Strand specific reads, first read reverse."
info: null
direction: "input"
- type: "double"
name: "--fragment_length"
alternatives:
- "-l"
description: "The estimated average fragment length."
info: null
default:
- "$id.run_info.json"
must_exist: true
create_parent: true
required: false
direction: "output"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--quant_results_file"
description: "TSV file containing abundance estimates from Kallisto"
- type: "double"
name: "--sd"
alternatives:
- "-s"
description: "The estimated standard deviation of the fragment length (default:\
\ -l, -s values are estimated \nfrom paired end data, but are required when\
\ using --single).\n"
info: null
default:
- "$id.abundance.tsv"
must_exist: true
create_parent: true
required: false
direction: "output"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--plaintext"
description: "Output plaintext instead of HDF5."
info: null
direction: "input"
- type: "integer"
name: "--bootstrap_samples"
alternatives:
- "-b"
description: "Number of bootstrap samples to draw. Default: '0'\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Random seed for bootstrap. Default: '42'\n"
info: null
example:
- 42
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Computes equivalence classes for reads and quantifies abundances.\n"
description: "Quantifying abundances of transcripts from RNA-Seq data, or more generally\
\ of target sequences using high-throughput sequencing reads.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "transcriptome.fasta"
- type: "file"
path: "SRR6357070_1.fastq.gz"
- type: "file"
path: "SRR6357070_2.fastq.gz"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/kallisto/quant/main.nf"
- "modules/nf-core/kallisto/quant/meta.yml"
last_sha: "aff1d2e02717247831644769fc3ba84868c3fdde"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
keywords:
- "kallisto"
- "quant"
- "pseudoalignment"
license: "BSD 2-Clause License"
references:
doi:
- "10.1038/nbt.3519"
links:
repository: "https://github.com/pachterlab/kallisto"
homepage: "https://pachterlab.github.io/kallisto/about"
documentation: "https://pachterlab.github.io/kallisto/manual"
issue_tracker: "https://github.com/pachterlab/kallisto/issues"
runners:
- type: "executable"
id: "executable"
@@ -242,6 +232,9 @@ engines:
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
\ /usr/local/bin/\n"
- type: "docker"
run:
- "echo \"kallisto: $(kallisto version | sed 's/kallisto, version //')\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
@@ -253,31 +246,28 @@ build_info:
output: "target/nextflow/kallisto/kallisto_quant"
executable: "target/nextflow/kallisto/kallisto_quant/main.nf"
viash_version: "0.9.0"
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "rnaseq"
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -2817,76 +2817,21 @@ meta = [
"description" : "List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"required" : true,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ","
},
{
"type" : "boolean",
"name" : "--paired",
"description" : "Paired reads or not.",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--strandedness",
"description" : "Sample strand-specificity.",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--index",
"alternatives" : [
"-i"
],
"description" : "Kallisto genome index.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--gtf",
"description" : "Optional gtf file for translation of transcripts into genomic coordinates.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--chromosomes",
"description" : "Optional tab separated file with chromosome names and lengths.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--fragment_length",
"description" : "For single-end mode only, the estimated average fragment length.",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--fragment_length_sd",
"description" : "For single-end mode only, the estimated standard deviation of the fragment length.",
"required" : false,
"required" : true,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
@@ -2898,14 +2843,14 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--output",
"description" : "Kallisto quant results",
"default" : [
"$id.kallisto_quant_results"
"name" : "--output_dir",
"alternatives" : [
"-o"
],
"description" : "Directory to write output to.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"required" : true,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
@@ -2914,41 +2859,96 @@ meta = [
"type" : "file",
"name" : "--log",
"description" : "File containing log information from running kallisto quant",
"default" : [
"$id.kallisto_quant.log.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"type" : "file",
"name" : "--run_info",
"description" : "A json file containing information about the run",
"default" : [
"$id.run_info.json"
"name" : "Options",
"arguments" : [
{
"type" : "boolean_true",
"name" : "--single",
"description" : "Single end mode.",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--single_overhang",
"description" : "Include reads where unobserved rest of fragment is predicted to lie outside a transcript.",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--fr_stranded",
"description" : "Strand specific reads, first read forward.",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--rf_stranded",
"description" : "Strand specific reads, first read reverse.",
"direction" : "input"
},
{
"type" : "double",
"name" : "--fragment_length",
"alternatives" : [
"-l"
],
"must_exist" : true,
"create_parent" : true,
"description" : "The estimated average fragment length.",
"required" : false,
"direction" : "output",
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--quant_results_file",
"description" : "TSV file containing abundance estimates from Kallisto",
"default" : [
"$id.abundance.tsv"
"type" : "double",
"name" : "--sd",
"alternatives" : [
"-s"
],
"must_exist" : true,
"create_parent" : true,
"description" : "The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n",
"required" : false,
"direction" : "output",
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--plaintext",
"description" : "Output plaintext instead of HDF5.",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--bootstrap_samples",
"alternatives" : [
"-b"
],
"description" : "Number of bootstrap samples to draw. Default: '0'\n",
"example" : [
0
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--seed",
"description" : "Random seed for bootstrap. Default: '42'\n",
"example" : [
42
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
@@ -2962,7 +2962,7 @@ meta = [
"is_executable" : true
}
],
"description" : "Computes equivalence classes for reads and quantifies abundances.\n",
"description" : "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n",
"test_resources" : [
{
"type" : "bash_script",
@@ -2971,47 +2971,32 @@ meta = [
},
{
"type" : "file",
"path" : "/testData/minimal_test/reference/transcriptome.fasta"
},
{
"type" : "file",
"path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz"
},
{
"type" : "file",
"path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz"
"path" : "test_data"
}
],
"info" : {
"migration_info" : {
"git_repo" : "https://github.com/nf-core/rnaseq.git",
"paths" : [
"modules/nf-core/kallisto/quant/main.nf",
"modules/nf-core/kallisto/quant/meta.yml"
],
"last_sha" : "aff1d2e02717247831644769fc3ba84868c3fdde"
}
},
"status" : "enabled",
"requirements" : {
"commands" : [
"ps"
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
"keywords" : [
"kallisto",
"quant",
"pseudoalignment"
],
"license" : "BSD 2-Clause License",
"references" : {
"doi" : [
"10.1038/nbt.3519"
]
},
"links" : {
"repository" : "https://github.com/pachterlab/kallisto",
"homepage" : "https://pachterlab.github.io/kallisto/about",
"documentation" : "https://pachterlab.github.io/kallisto/manual",
"issue_tracker" : "https://github.com/pachterlab/kallisto/issues"
},
"runners" : [
{
"type" : "executable",
@@ -3100,6 +3085,12 @@ meta = [
"run" : [
"apt-get update && \\\\\napt-get install -y --no-install-recommends wget && \\\\\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \\\\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\\\nmv kallisto/kallisto /usr/local/bin/\n"
]
},
{
"type" : "docker",
"run" : [
"echo \\"kallisto: $(kallisto version | sed 's/kallisto, version //')\\" > /var/software_versions.txt\n"
]
}
]
},
@@ -3112,46 +3103,36 @@ meta = [
"config" : "/workdir/root/repo/src/kallisto/kallisto_quant/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/kallisto/kallisto_quant",
"output" : "target/nextflow/kallisto/kallisto_quant",
"viash_version" : "0.9.0",
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
"git_tag" : "v0.2.0-26-ga13b57d"
},
"package_config" : {
"name" : "rnaseq",
"version" : "main",
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-test-data/rnaseq/v1",
"dest" : "testData"
}
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"version" : "main",
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
"viash_version" : "0.9.0",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"source" : "src",
"target" : "target",
"config_mods" : [
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
".requirements.commands := ['ps']\n",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'main'"
],
"organization" : "vsh"
"keywords" : [
"bioinformatics",
"modules",
"sequencing"
],
"license" : "MIT",
"organization" : "vsh",
"links" : {
"repository" : "https://github.com/viash-hub/biobox",
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
}
}
}'''))
]
@@ -3165,20 +3146,23 @@ def innerWorkflowFactory(args) {
def rawScript = '''set -e
tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
#!/bin/bash
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi )
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi )
$( if [ ! -z ${VIASH_PAR_CHROMOSOMES+x} ]; then echo "${VIASH_PAR_CHROMOSOMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chromosomes='&'#" ; else echo "# par_chromosomes="; fi )
$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi )
$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_SD+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_sd='&'#" ; else echo "# par_fragment_length_sd="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi )
$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi )
$( if [ ! -z ${VIASH_PAR_RUN_INFO+x} ]; then echo "${VIASH_PAR_RUN_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_info='&'#" ; else echo "# par_run_info="; fi )
$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS_FILE+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_results_file='&'#" ; else echo "# par_quant_results_file="; fi )
$( if [ ! -z ${VIASH_PAR_SINGLE+x} ]; then echo "${VIASH_PAR_SINGLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single='&'#" ; else echo "# par_single="; fi )
$( if [ ! -z ${VIASH_PAR_SINGLE_OVERHANG+x} ]; then echo "${VIASH_PAR_SINGLE_OVERHANG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single_overhang='&'#" ; else echo "# par_single_overhang="; fi )
$( if [ ! -z ${VIASH_PAR_FR_STRANDED+x} ]; then echo "${VIASH_PAR_FR_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fr_stranded='&'#" ; else echo "# par_fr_stranded="; fi )
$( if [ ! -z ${VIASH_PAR_RF_STRANDED+x} ]; then echo "${VIASH_PAR_RF_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rf_stranded='&'#" ; else echo "# par_rf_stranded="; fi )
$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi )
$( if [ ! -z ${VIASH_PAR_SD+x} ]; then echo "${VIASH_PAR_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sd='&'#" ; else echo "# par_sd="; fi )
$( if [ ! -z ${VIASH_PAR_PLAINTEXT+x} ]; then echo "${VIASH_PAR_PLAINTEXT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_plaintext='&'#" ; else echo "# par_plaintext="; fi )
$( if [ ! -z ${VIASH_PAR_BOOTSTRAP_SAMPLES+x} ]; then echo "${VIASH_PAR_BOOTSTRAP_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bootstrap_samples='&'#" ; else echo "# par_bootstrap_samples="; fi )
$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3199,46 +3183,46 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
## VIASH END
#!/bin/bash
set -eo pipefail
IFS="," read -ra input <<< \\$par_input
unset_if_false=( par_single par_single_overhang par_rf_stranded par_fr_stranded par_plaintext )
single_end_params=''
if [ \\$par_paired == "false" ]; then
if [[ \\$par_fragment_length < 0 ]] || [[ ! \\$fragment_length_sd < 0 ]]; then
echo "fragment_length and fragment_length_sd must be set for single-end data"
for var in "\\${unset_if_false[@]}"; do
temp_var="\\${!var}"
[[ "\\$temp_var" == "false" ]] && unset \\$var
done
IFS=";" read -ra input <<< \\$par_input
# Check if par_single is not set and ensure even number of input files
if [ -z "\\$par_single" ]; then
if [ \\$((\\${#input[@]} % 2)) -ne 0 ]; then
echo "Error: When running in paired-end mode, the number of input files must be even."
echo "Number of input files provided: \\${#input[@]}"
exit 1
fi
single_end_params="--single --fragment-length \\$par_fragment_length --sd \\$par_fragment_length_sd"
fi
strandedness=''
if [[ "\\$par_extra_args" != *"--fr-stranded"* ]] && [[ "\\$par_extra_args" != *"--rf-stranded"* ]]; then
if [ "\\$par_strandedness" == 'forward' ]; then
strandedness='--fr-stranded'
elif [ "\\$par_strandedness" == 'reverse' ]; then
strandedness='--rf-stranded'
fi
fi
mkdir -p \\$par_output
mkdir -p \\$par_output_dir
kallisto quant \\\\
\\${meta_cpus:+--threads \\$meta_cpus} \\\\
--index \\$par_index \\\\
\\${par_gtf:+--gtf \\$par_gtf} \\\\
\\${par_chromosomes:+--chromosomes \\$par_chromosomes} \\\\
\\$single_end_params \\\\
\\$strandedness \\\\
\\$par_extra_args \\\\
-o \\$par_output \\\\
\\${input[*]} 2> >(tee -a \\${par_output}/kallisto_quant.log >&2)
mv \\${par_output}/kallisto_quant.log \\${par_log}
mv \\${par_output}/run_info.json \\${par_run_info}
cp \\${par_output}/abundance.tsv \\${par_quant_results_file}
-i \\$par_index \\\\
\\${par_gtf:+--gtf "\\${par_gtf}"} \\\\
\\${par_single:+--single} \\\\
\\${par_single_overhang:+--single-overhang} \\\\
\\${par_fr_stranded:+--fr-stranded} \\\\
\\${par_rf_stranded:+--rf-stranded} \\\\
\\${par_plaintext:+--plaintext} \\\\
\\${par_bootstrap_samples:+--bootstrap-samples "\\${par_bootstrap_samples}"} \\\\
\\${par_fragment_length:+--fragment-length "\\${par_fragment_length}"} \\\\
\\${par_sd:+--sd "\\${par_sd}"} \\\\
\\${par_seed:+--seed "\\${par_seed}"} \\\\
-o \\$par_output_dir \\\\
\\${input[*]} 2> >(tee -a \\$par_log >&2)
VIASHMAIN
bash "$tempscript"
'''
@@ -3599,7 +3583,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/rnaseq/kallisto/kallisto_quant",
"image" : "vsh/biobox/kallisto/kallisto_quant",
"tag" : "main"
},
"tag" : "$id"

View File

@@ -0,0 +1,125 @@
manifest {
name = 'kallisto/kallisto_quant'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,225 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "kallisto_quant",
"description": "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n",
"type": "object",
"definitions": {
"input" : {
"title": "Input",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively."
}
,
"index": {
"type":
"string",
"description": "Type: `file`, required. Kallisto genome index",
"help_text": "Type: `file`, required. Kallisto genome index."
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"output_dir": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to",
"help_text": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to."
,
"default": "$id.$key.output_dir.output_dir"
}
,
"log": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant",
"help_text": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant"
,
"default": "$id.$key.log.log"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"single": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Single end mode",
"help_text": "Type: `boolean_true`, default: `false`. Single end mode."
,
"default": "False"
}
,
"single_overhang": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript",
"help_text": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript."
,
"default": "False"
}
,
"fr_stranded": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward",
"help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward."
,
"default": "False"
}
,
"rf_stranded": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse",
"help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse."
,
"default": "False"
}
,
"fragment_length": {
"type":
"number",
"description": "Type: `double`. The estimated average fragment length",
"help_text": "Type: `double`. The estimated average fragment length."
}
,
"sd": {
"type":
"number",
"description": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single)",
"help_text": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n"
}
,
"plaintext": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5",
"help_text": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5."
,
"default": "False"
}
,
"bootstrap_samples": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Number of bootstrap samples to draw",
"help_text": "Type: `integer`, example: `0`. Number of bootstrap samples to draw. Default: \u00270\u0027\n"
}
,
"seed": {
"type":
"integer",
"description": "Type: `integer`, example: `42`. Random seed for bootstrap",
"help_text": "Type: `integer`, example: `42`. Random seed for bootstrap. Default: \u002742\u0027\n"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -1,12 +1,30 @@
name: "qualimap"
name: "qualimap_rnaseq"
namespace: "qualimap"
version: "main"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "path to input mapping file in BAM format."
name: "--bam"
description: "Path to the sequence alignment file in BAM format, produced by a\
\ splicing-aware aligner."
info: null
example:
- "alignment.bam"
must_exist: true
create_parent: true
required: true
@@ -15,8 +33,10 @@ argument_groups:
multiple_sep: ";"
- type: "file"
name: "--gtf"
description: "path to annotations file in Ensembl GTF format."
description: "Path to genomic annotations in Ensembl GTF format."
info: null
example:
- "annotations.gtf"
must_exist: true
create_parent: true
required: true
@@ -26,11 +46,21 @@ argument_groups:
- name: "Output"
arguments:
- type: "file"
name: "--output_dir"
description: "path to output directory for raw data and report."
name: "--qc_results"
description: "Text file containing the RNAseq QC results."
info: null
example:
- "rnaseq_qc_results.txt"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts"
description: "Output file for computed counts."
info: null
default:
- "$id.qualimap_output"
must_exist: true
create_parent: true
required: false
@@ -38,48 +68,34 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_pdf"
description: "path to output file for pdf report."
name: "--report"
description: "Report output file. Supported formats are PDF or HTML."
info: null
default:
- "$id.report.pdf"
must_exist: false
example:
- "report.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_format"
description: "Format of the output report (PDF or HTML, default is HTML)"
info: null
default:
- "html"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Optional"
arguments:
- type: "integer"
name: "--pr_bases"
name: "--num_pr_bases"
description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\
\ bias (default = 100)."
info: null
default:
- 100
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--tr_bias"
name: "--num_tr_bias"
description: "Number of top highly expressed transcripts to compute 5'-3' bias\
\ (default = 1000)."
info: null
default:
- 1000
required: false
min: 1
direction: "input"
@@ -89,9 +105,10 @@ argument_groups:
name: "--algorithm"
description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)."
info: null
default:
- "uniquely-mapped-reads"
required: false
choices:
- "uniquely-mapped-reads"
- "proportional"
direction: "input"
multiple: false
multiple_sep: ";"
@@ -100,8 +117,6 @@ argument_groups:
description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\
\ or non-strand-specific (default))."
info: null
default:
- "non-strand-specific"
required: false
choices:
- "non-strand-specific"
@@ -127,8 +142,6 @@ argument_groups:
name: "--java_memory_size"
description: "maximum Java heap memory size, default = 4G."
info: null
default:
- "4G"
required: false
direction: "input"
multiple: false
@@ -137,36 +150,33 @@ resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "RNA-seq QC analysis using the qualimap \n"
description: "Qualimap RNA-seq QC reports quality control metrics and bias estimations\
\ \nwhich are specific for whole transcriptome sequencing, including reads genomic\
\ \norigin, junction analysis, transcript coverage and 5-3 bias computation.\n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
- type: "file"
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai"
- type: "file"
path: "genes.gtf"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/qualimap/rnaseq/main.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
keywords:
- "RNA-seq"
- "quality control"
- "QC Report"
license: "GPL-2.0"
references:
doi:
- "10.1093/bioinformatics/btv566"
links:
repository: "https://bitbucket.org/kokonech/qualimap/commits/branch/master"
homepage: "http://qualimap.conesalab.org/"
documentation: "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc"
issue_tracker: "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open"
runners:
- type: "executable"
id: "executable"
@@ -235,67 +245,47 @@ runners:
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
image: "quay.io/biocontainers/qualimap:2.3--hdfd78af_0"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "r-base"
- "unzip"
- "wget"
- "openjdk-8-jdk"
- "libxml2-dev"
- "libcurl4-openssl-dev"
interactive: false
- type: "docker"
run:
- "wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip &&\
\ \\\nunzip qualimap_v2.3.zip && \\\ncp -a qualimap_v2.3/. usr/bin && \\\nunset\
\ DISPLAY && \\\nmkdir -p tmp && \\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n"
- type: "r"
cran:
- "optparse"
bioc:
- "NOISeqr"
bioc_force_install: false
- "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/qualimap/config.vsh.yaml"
config: "src/qualimap/qualimap_rnaseq/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/qualimap"
executable: "target/nextflow/qualimap/main.nf"
output: "target/nextflow/qualimap/qualimap_rnaseq"
executable: "target/nextflow/qualimap/qualimap_rnaseq/main.nf"
viash_version: "0.9.0"
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "rnaseq"
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -1,4 +1,4 @@
// qualimap main
// qualimap_rnaseq main
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -8,6 +8,9 @@
// authors of this component should specify the license in the header of such
// files, or include a separate license file detailing the licenses of all included
// files.
//
// Component authors:
// * Dorien Roosen (author, maintainer)
////////////////////////////
// VDSL3 helper functions //
@@ -2804,16 +2807,43 @@ nextflow.enable.dsl=2
meta = [
"resources_dir": moduleDir.toRealPath().normalize(),
"config": processConfig(readJsonBlob('''{
"name" : "qualimap",
"name" : "qualimap_rnaseq",
"namespace" : "qualimap",
"version" : "main",
"authors" : [
{
"name" : "Dorien Roosen",
"roles" : [
"author",
"maintainer"
],
"info" : {
"links" : {
"email" : "dorien@data-intuitive.com",
"github" : "dorien-er",
"linkedin" : "dorien-roosen"
},
"organizations" : [
{
"name" : "Data Intuitive",
"href" : "https://www.data-intuitive.com",
"role" : "Data Scientist"
}
]
}
}
],
"argument_groups" : [
{
"name" : "Input",
"arguments" : [
{
"type" : "file",
"name" : "--input",
"description" : "path to input mapping file in BAM format.",
"name" : "--bam",
"description" : "Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner.",
"example" : [
"alignment.bam"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -2824,7 +2854,10 @@ meta = [
{
"type" : "file",
"name" : "--gtf",
"description" : "path to annotations file in Ensembl GTF format.",
"description" : "Path to genomic annotations in Ensembl GTF format.",
"example" : [
"annotations.gtf"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -2839,13 +2872,24 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--output_dir",
"description" : "path to output directory for raw data and report.",
"default" : [
"$id.qualimap_output"
"name" : "--qc_results",
"description" : "Text file containing the RNAseq QC results.",
"example" : [
"rnaseq_qc_results.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--counts",
"description" : "Output file for computed counts.",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
@@ -2853,29 +2897,17 @@ meta = [
},
{
"type" : "file",
"name" : "--output_pdf",
"description" : "path to output file for pdf report.",
"default" : [
"$id.report.pdf"
"name" : "--report",
"description" : "Report output file. Supported formats are PDF or HTML.",
"example" : [
"report.html"
],
"must_exist" : false,
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--output_format",
"description" : "Format of the output report (PDF or HTML, default is HTML)",
"default" : [
"html"
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
@@ -2884,11 +2916,8 @@ meta = [
"arguments" : [
{
"type" : "integer",
"name" : "--pr_bases",
"name" : "--num_pr_bases",
"description" : "Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).",
"default" : [
100
],
"required" : false,
"min" : 1,
"direction" : "input",
@@ -2897,11 +2926,8 @@ meta = [
},
{
"type" : "integer",
"name" : "--tr_bias",
"name" : "--num_tr_bias",
"description" : "Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).",
"default" : [
1000
],
"required" : false,
"min" : 1,
"direction" : "input",
@@ -2912,10 +2938,11 @@ meta = [
"type" : "string",
"name" : "--algorithm",
"description" : "Counting algorithm (uniquely-mapped-reads (default) or proportional).",
"default" : [
"uniquely-mapped-reads"
],
"required" : false,
"choices" : [
"uniquely-mapped-reads",
"proportional"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
@@ -2924,9 +2951,6 @@ meta = [
"type" : "string",
"name" : "--sequencing_protocol",
"description" : "Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
"default" : [
"non-strand-specific"
],
"required" : false,
"choices" : [
"non-strand-specific",
@@ -2953,9 +2977,6 @@ meta = [
"type" : "string",
"name" : "--java_memory_size",
"description" : "maximum Java heap memory size, default = 4G.",
"default" : [
"4G"
],
"required" : false,
"direction" : "input",
"multiple" : false,
@@ -2971,7 +2992,7 @@ meta = [
"is_executable" : true
}
],
"description" : "RNA-seq QC analysis using the qualimap \n",
"description" : "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5-3 bias computation.\n",
"test_resources" : [
{
"type" : "bash_script",
@@ -2980,46 +3001,32 @@ meta = [
},
{
"type" : "file",
"path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
},
{
"type" : "file",
"path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai"
},
{
"type" : "file",
"path" : "/testData/unit_test_resources/genes.gtf"
"path" : "test_data/"
}
],
"info" : {
"migration_info" : {
"git_repo" : "https://github.com/nf-core/rnaseq.git",
"paths" : [
"modules/nf-core/qualimap/rnaseq/main.nf"
],
"last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33"
}
},
"status" : "enabled",
"requirements" : {
"commands" : [
"ps"
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
"keywords" : [
"RNA-seq",
"quality control",
"QC Report"
],
"license" : "GPL-2.0",
"references" : {
"doi" : [
"10.1093/bioinformatics/btv566"
]
},
"links" : {
"repository" : "https://bitbucket.org/kokonech/qualimap/commits/branch/master",
"homepage" : "http://qualimap.conesalab.org/",
"documentation" : "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc",
"issue_tracker" : "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open"
},
"runners" : [
{
"type" : "executable",
@@ -3098,38 +3105,16 @@ meta = [
{
"type" : "docker",
"id" : "docker",
"image" : "ubuntu:22.04",
"image" : "quay.io/biocontainers/qualimap:2.3--hdfd78af_0",
"target_registry" : "images.viash-hub.com",
"target_tag" : "main",
"namespace_separator" : "/",
"setup" : [
{
"type" : "apt",
"packages" : [
"r-base",
"unzip",
"wget",
"openjdk-8-jdk",
"libxml2-dev",
"libcurl4-openssl-dev"
],
"interactive" : false
},
{
"type" : "docker",
"run" : [
"wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \\\\\nunzip qualimap_v2.3.zip && \\\\\ncp -a qualimap_v2.3/. usr/bin && \\\\\nunset DISPLAY && \\\\\nmkdir -p tmp && \\\\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n"
"echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n"
]
},
{
"type" : "r",
"cran" : [
"optparse"
],
"bioc" : [
"NOISeqr"
],
"bioc_force_install" : false
}
]
},
@@ -3139,49 +3124,39 @@ meta = [
}
],
"build_info" : {
"config" : "/workdir/root/repo/src/qualimap/config.vsh.yaml",
"config" : "/workdir/root/repo/src/qualimap/qualimap_rnaseq/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/qualimap",
"output" : "target/nextflow/qualimap/qualimap_rnaseq",
"viash_version" : "0.9.0",
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
"git_tag" : "v0.2.0-26-ga13b57d"
},
"package_config" : {
"name" : "rnaseq",
"version" : "main",
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-test-data/rnaseq/v1",
"dest" : "testData"
}
]
},
"repositories" : [
{
"type" : "vsh",
"name" : "biobox",
"repo" : "vsh/biobox",
"tag" : "main"
},
{
"type" : "vsh",
"name" : "craftbox",
"repo" : "craftbox",
"tag" : "v0.1.0"
}
],
"version" : "main",
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
"viash_version" : "0.9.0",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"source" : "src",
"target" : "target",
"config_mods" : [
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
".requirements.commands := ['ps']\n",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'main'"
],
"organization" : "vsh"
"keywords" : [
"bioinformatics",
"modules",
"sequencing"
],
"license" : "MIT",
"organization" : "vsh",
"links" : {
"repository" : "https://github.com/viash-hub/biobox",
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
}
}
}'''))
]
@@ -3197,13 +3172,13 @@ tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi )
$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_PDF+x} ]; then echo "${VIASH_PAR_OUTPUT_PDF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_pdf='&'#" ; else echo "# par_output_pdf="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi )
$( if [ ! -z ${VIASH_PAR_PR_BASES+x} ]; then echo "${VIASH_PAR_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pr_bases='&'#" ; else echo "# par_pr_bases="; fi )
$( if [ ! -z ${VIASH_PAR_TR_BIAS+x} ]; then echo "${VIASH_PAR_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tr_bias='&'#" ; else echo "# par_tr_bias="; fi )
$( if [ ! -z ${VIASH_PAR_QC_RESULTS+x} ]; then echo "${VIASH_PAR_QC_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qc_results='&'#" ; else echo "# par_qc_results="; fi )
$( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo "${VIASH_PAR_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts='&'#" ; else echo "# par_counts="; fi )
$( if [ ! -z ${VIASH_PAR_REPORT+x} ]; then echo "${VIASH_PAR_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report='&'#" ; else echo "# par_report="; fi )
$( if [ ! -z ${VIASH_PAR_NUM_PR_BASES+x} ]; then echo "${VIASH_PAR_NUM_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_pr_bases='&'#" ; else echo "# par_num_pr_bases="; fi )
$( if [ ! -z ${VIASH_PAR_NUM_TR_BIAS+x} ]; then echo "${VIASH_PAR_NUM_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_tr_bias='&'#" ; else echo "# par_num_tr_bias="; fi )
$( if [ ! -z ${VIASH_PAR_ALGORITHM+x} ]; then echo "${VIASH_PAR_ALGORITHM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_algorithm='&'#" ; else echo "# par_algorithm="; fi )
$( if [ ! -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then echo "${VIASH_PAR_SEQUENCING_PROTOCOL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sequencing_protocol='&'#" ; else echo "# par_sequencing_protocol="; fi )
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
@@ -3233,20 +3208,52 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}"
set -eo pipefail
mkdir -p \\$par_output_dir
tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" qualimap_XXXXXXXXX)
# Handle output parameters
if [ -n "\\$par_report" ]; then
outfile=\\$(basename "\\$par_report")
report_extension="\\${outfile##*.}"
fi
if [ -n "\\$par_counts" ]; then
counts=\\$(basename "\\$par_counts")
fi
# disable flags
[[ "\\$par_paired" == "false" ]] && unset par_paired
[[ "\\$par_sorted" == "false" ]] && unset par_sorted
# Run qualimap
qualimap rnaseq \\\\
--java-mem-size=\\$par_java_memory_size \\\\
--algorithm \\$par_algorithm \\\\
--num-pr-bases \\$par_pr_bases \\\\
--num-tr-bias \\$par_tr_bias \\\\
--sequencing-protocol \\$par_sequencing_protocol \\\\
-bam \\$par_input \\\\
\\${meta_memory_mb:+--java-mem-size=\\${meta_memory_mb}M} \\\\
\\${par_algorithm:+--algorithm \\$par_algorithm} \\\\
\\${par_sequencing_protocol:+--sequencing-protocol \\$par_sequencing_protocol} \\\\
-bam \\$par_bam \\\\
-gtf \\$par_gtf \\\\
\\${par_paired:+-pe} \\\\
\\${par_sorted:+-s} \\\\
-outdir \\$par_output_dir \\\\
-outformat \\$par_output_format
-outdir "\\$tmp_dir" \\\\
\\${par_num_pr_bases:+--num-pr-bases \\$par_num_pr_bases} \\\\
\\${par_num_tr_bias:+--num-tr-bias \\$par_num_tr_bias} \\\\
\\${par_report:+-outformat \\$report_extension} \\\\
\\${par_paired:+--paired} \\\\
\\${par_sorted:+--sorted} \\\\
\\${par_report:+-outfile "\\$outfile"} \\\\
\\${par_counts:+-oc "\\$counts"}
# Move output files
mv "\\$tmp_dir/rnaseq_qc_results.txt" "\\$par_qc_results"
if [ -n "\\$par_report" ] && [ \\$report_extension = "html" ]; then
mv "\\$tmp_dir/qualimapReport.html" "\\$par_report"
fi
if [ -n "\\$par_report" ] && [ \\$report_extension = "pdf" ]; then
mv "\\$tmp_dir/\\$outfile" "\\$par_report"
fi
if [ -n "\\$par_counts" ]; then
mv "\\$tmp_dir/\\$counts" "\\$par_counts"
fi
VIASHMAIN
bash "$tempscript"
'''
@@ -3607,7 +3614,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/rnaseq/qualimap",
"image" : "vsh/biobox/qualimap/qualimap_rnaseq",
"tag" : "main"
},
"tag" : "$id"

View File

@@ -1,9 +1,10 @@
manifest {
name = 'fastqc'
name = 'qualimap/qualimap_rnaseq'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n'
description = 'Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5-3 bias computation.\n'
author = 'Dorien Roosen'
}
process.container = 'nextflow/bash:latest'

View File

@@ -1,7 +1,7 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "qualimap",
"description": "RNA-seq QC analysis using the qualimap \n",
"title": "qualimap_rnaseq",
"description": "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5\u2019-3\u2019 bias computation.\n",
"type": "object",
"definitions": {
@@ -14,11 +14,11 @@
"properties": {
"input": {
"bam": {
"type":
"string",
"description": "Type: `file`, required. path to input mapping file in BAM format",
"help_text": "Type: `file`, required. path to input mapping file in BAM format."
"description": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner",
"help_text": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner."
}
@@ -27,8 +27,8 @@
"gtf": {
"type":
"string",
"description": "Type: `file`, required. path to annotations file in Ensembl GTF format",
"help_text": "Type: `file`, required. path to annotations file in Ensembl GTF format."
"description": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format",
"help_text": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format."
}
@@ -44,35 +44,35 @@
"properties": {
"output_dir": {
"qc_results": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output_dir.qualimap_output`. path to output directory for raw data and report",
"help_text": "Type: `file`, default: `$id.$key.output_dir.qualimap_output`. path to output directory for raw data and report."
"description": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results",
"help_text": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results."
,
"default":"$id.$key.output_dir.qualimap_output"
"default": "$id.$key.qc_results.txt"
}
,
"output_pdf": {
"counts": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output_pdf.pdf`. path to output file for pdf report",
"help_text": "Type: `file`, default: `$id.$key.output_pdf.pdf`. path to output file for pdf report."
"description": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts",
"help_text": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts."
,
"default":"$id.$key.output_pdf.pdf"
"default": "$id.$key.counts.counts"
}
,
"output_format": {
"report": {
"type":
"string",
"description": "Type: `string`, default: `html`. Format of the output report (PDF or HTML, default is HTML)",
"help_text": "Type: `string`, default: `html`. Format of the output report (PDF or HTML, default is HTML)"
"description": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file",
"help_text": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file. Supported formats are PDF or HTML."
,
"default":"html"
"default": "$id.$key.report.html"
}
@@ -87,24 +87,22 @@
"properties": {
"pr_bases": {
"num_pr_bases": {
"type":
"integer",
"description": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)",
"help_text": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)."
,
"default":100
"description": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)",
"help_text": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)."
}
,
"tr_bias": {
"num_tr_bias": {
"type":
"integer",
"description": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)",
"help_text": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)."
,
"default":1000
"description": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)",
"help_text": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)."
}
@@ -112,10 +110,11 @@
"algorithm": {
"type":
"string",
"description": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm (uniquely-mapped-reads (default) or proportional)",
"help_text": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm (uniquely-mapped-reads (default) or proportional)."
,
"default":"uniquely-mapped-reads"
"description": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional)",
"help_text": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional).",
"enum": ["uniquely-mapped-reads", "proportional"]
}
@@ -123,12 +122,11 @@
"sequencing_protocol": {
"type":
"string",
"description": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))",
"help_text": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
"description": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))",
"help_text": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
"enum": ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"]
,
"default":"non-strand-specific"
}
@@ -139,7 +137,7 @@
"description": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads",
"help_text": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads."
,
"default":false
"default": "False"
}
@@ -150,7 +148,7 @@
"description": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name",
"help_text": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis."
,
"default":false
"default": "False"
}
@@ -158,10 +156,9 @@
"java_memory_size": {
"type":
"string",
"description": "Type: `string`, default: `4G`. maximum Java heap memory size, default = 4G",
"help_text": "Type: `string`, default: `4G`. maximum Java heap memory size, default = 4G."
,
"default":"4G"
"description": "Type: `string`. maximum Java heap memory size, default = 4G",
"help_text": "Type: `string`. maximum Java heap memory size, default = 4G."
}

View File

@@ -0,0 +1,879 @@
name: "rsem_calculate_expression"
namespace: "rsem"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse"
info: null
required: false
choices:
- "forward"
- "reverse"
- "unstranded"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--paired"
description: "Paired-end reads or not?"
info: null
direction: "input"
- type: "file"
name: "--input"
description: "Input reads for quantification."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--index"
description: "RSEM index."
info: null
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_args"
description: "Extra rsem-calculate-expression arguments in addition to the examples."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--counts_gene"
description: "Expression counts on gene level"
info: null
example:
- "$id.genes.results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcripts"
description: "Expression counts on transcript level"
info: null
example:
- "$id.isoforms.results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stat"
description: "RSEM statistics"
info: null
example:
- "$id.stat"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--logs"
description: "RSEM logs"
info: null
example:
- "$id.log"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_star"
description: "BAM file generated by STAR (optional)"
info: null
example:
- "$id.STAR.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_genome"
description: "Genome BAM file (optional)"
info: null
example:
- "$id.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_transcript"
description: "Transcript BAM file (optional)"
info: null
example:
- "$id.transcript.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--sort_bam_by_read_name"
description: "Sort BAM file aligned under transcript coordidate by read name.\
\ Setting this option on will produce \ndeterministic maximum likelihood estimations\
\ from independent runs. Note that sorting will take long \ntime and lots of\
\ memory.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_bam_output"
description: "Do not output any BAM file."
info: null
direction: "input"
- type: "boolean_true"
name: "--sampling_for_bam"
description: "When RSEM generates a BAM file, instead of outputting all alignments\
\ a read has with their posterior \nprobabilities, one alignment is sampled\
\ according to the posterior probabilities. The sampling procedure \nincludes\
\ the alignment to the \"noise\" transcript, which does not appear in the BAM\
\ file. Only the \nsampled alignment has a weight of 1. All other alignments\
\ have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared\
\ in the BAM file should have weight 0.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--output_genome_bam"
description: "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped\
\ to genomic coordinates and \nannotated with their posterior probabilities.\
\ In addition, RSEM will call samtools (included in RSEM \npackage) to sort\
\ and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai'\
\ \nwill be generated.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--sort_bam_by_coordinate"
description: "Sort RSEM generated transcript and genome BAM files by coordinates\
\ and build associated indices.\n"
info: null
direction: "input"
- name: "Basic Options"
arguments:
- type: "boolean_true"
name: "--no_qualities"
description: "Input reads do not contain quality scores."
info: null
direction: "input"
- type: "boolean_true"
name: "--alignments"
description: "Input file contains alignments in SAM/BAM/CRAM format. The exact\
\ file format will be determined \nautomatically.\n"
info: null
direction: "input"
- type: "file"
name: "--fai"
description: "If the header section of input alignment file does not contain reference\
\ sequence information, \nthis option should be turned on. <file> is a FAI format\
\ file containing each reference sequence's \nname and length. Please refer\
\ to the SAM official website for the details of FAI format.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bowtie2"
description: "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM\
\ does not handle indel, local \nand discordant alignments, the Bowtie2 parameters\
\ are set in a way to avoid those alignments. In \nparticular, we use options\
\ '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1'\
\ \nby default. The last parameter of '--score_min', '-0.1', is the negative\
\ of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'.\
\ If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--star"
description: "Use STAR to align reads. Alignment parameters are from ENCODE3's\
\ STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's\
\ Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory\
\ with name as 'sample_name.bam'. Each STAR job will have its own private copy\
\ of \nthe genome in memory.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--hisat2_hca"
description: "Use HISAT2 to align reads to the transcriptome according to Human\
\ Cell Atlast.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--append_names"
description: "If gene_name/transcript_name is available, append it to the end\
\ of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results'\
\ and 'sample_name.genes.results'.\n"
info: null
direction: "input"
- type: "integer"
name: "--seed"
description: "Set the seed for the random number generators used in calculating\
\ posterior mean estimates and \ncredibility intervals. The seed must be a non-negative\
\ 32 bit integer.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--single_cell_prior"
description: "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior\
\ mean estimates and credibility \nintervals. However, much less genes are expressed\
\ in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean\
\ estimates and/or credibility intervals and you have single-cell RNA-Seq data,\
\ \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1)\
\ as the prior which \nencourage the sparsity of the expression levels.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--calc_pme"
description: "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates."
info: null
direction: "input"
- type: "boolean_true"
name: "--calc_ci"
description: "Calculate 95% credibility intervals and posterior mean estimates.\
\ The credibility level can be \nchanged by setting '--ci_credibility_level'.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Suppress the output of logging information."
info: null
direction: "input"
- name: "Aligner Options"
arguments:
- type: "integer"
name: "--seed_length"
description: "Seed length used by the read aligner. Providing the correct value\
\ is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's\
\ seed length parameter. Any read with its or at least \none of its mates' (for\
\ paired-end reads) length less than this value will be ignored. If the \nreferences\
\ are not added poly(A) tails, the minimum allowed value is 5, otherwise, the\
\ minimum \nallowed value is 25. Note that this script will only check if the\
\ value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default:\
\ 25)\n"
info: null
example:
- 25
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--phred64_quals"
description: "Input quality scores are encoded as Phred+64 (default for GA Pipeline\
\ ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise,\
\ quality score will be encoded as Phred+33. (Default: false)\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--solexa_quals"
description: "Input quality scores are solexa encoded (from GA Pipeline ver. <\
\ 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality\
\ score will be encoded as Phred+33. (Default: false)\n"
info: null
direction: "input"
- type: "integer"
name: "--bowtie_n"
description: "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,\
\ Default: 2)\n"
info: null
example:
- 2
required: false
choices:
- 0
- 1
- 2
- 3
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie_e"
description: "(Bowtie parameter) max sum of mismatch quality scores across the\
\ alignment. (Default: 99999999)\n"
info: null
example:
- 99999999
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie_m"
description: "(Bowtie parameter) suppress all alignments for a read if > <int>\
\ valid alignments exist. (Default: 200)\n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie_chunkmbs"
description: "(Bowtie parameter) memory allocated for best first alignment calculation\
\ (Default: 0 - use Bowtie's default)\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--bowtie2_mismatch_rate"
description: "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:\
\ 0.1)\n"
info: null
example:
- 0.1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie2_k"
description: "(Bowtie 2 parameter) Find up to <int> alignments per read. (Default:\
\ 200)\n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--bowtie2_sensitivity_level"
description: "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end\
\ mode. This option controls how \nhard Bowtie 2 tries to find alignments. <string>\
\ must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\"\
. The four candidates correspond to Bowtie 2's \"--very-fast\", \"--fast\",\
\ \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\"\
\ - use Bowtie 2's default)\n"
info: null
example:
- "sensitive"
required: false
choices:
- "very_fast"
- "fast"
- "sensitive"
- "very_sensitive"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--star_gzipped_read_file"
description: "Input read file(s) is compressed by gzip. (Default: false)\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--star_bzipped_read_file"
description: "Input read file(s) is compressed by bzip2. (Default: false)\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--star_output_genome_bam"
description: "Save the BAM file from STAR alignment under genomic coordinate to\
\ 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate.\
\ In this file, according to STAR's manual, 'paired \nends of an alignment are\
\ always adjacent, and multiple alignments of a read are adjacent as well'.\
\ \n(Default: false)\n"
info: null
direction: "input"
- name: "Advanced Options"
arguments:
- type: "string"
name: "--tag"
description: "The name of the optional field used in the SAM input for identifying\
\ a read with too many valid \nalignments. The field should have the format\
\ <tagName>:i:<value>, where a <value> bigger than 0 \nindicates a read with\
\ too many alignments. (Default: \"\")\n"
info: null
example:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_min"
description: "Minimum read/insert length allowed. This is also the value for the\
\ Bowtie/Bowtie2 -I option. \n(Default: 1)\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_max"
description: "Maximum read/insert length allowed. This is also the value for the\
\ Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_mean"
description: "(single-end data only) The mean of the fragment length distribution,\
\ which is assumed to be a \nGaussian. (Default: -1, which disables use of the\
\ fragment length distribution)\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--gragment_length_sd"
description: "(single-end data only) The standard deviation of the fragment length\
\ distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes\
\ that all fragments are of the same length, \ngiven by the rounded value of\
\ --fragment_length_mean).\n"
info: null
example:
- 0.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--estimate_rspd"
description: "Set this option if you want to estimate the read start position\
\ distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n"
info: null
direction: "input"
- type: "integer"
name: "--num_rspd_bins"
description: "Number of bins in the RSPD. Only relevant when '--estimate_rspd'\
\ is specified. Use of the default \nsetting is recommended. (Default: 20)\n"
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gibbs_burnin"
description: "The number of burn-in rounds for RSEM's Gibbs sampler. Each round\
\ passes over the entire data set \nonce. If RSEM can use multiple threads,\
\ multiple Gibbs samplers will start at the same time and all \nsamplers share\
\ the same burn-in number. (Default: 200)\n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gibbs_number_of_samples"
description: "The total number of count vectors RSEM will collect from its Gibbs\
\ samplers. (Default: 1000)\n"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gibbs_sampling_gap"
description: "The number of rounds between two succinct count vectors RSEM collects.\
\ If the count vector after \nround N is collected, the count vector after round\
\ N + <int> will also be collected. (Default: 1)\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--ci_credibility_level"
description: "The credibility level for credibility intervals. (Default: 0.95)\n"
info: null
example:
- 0.95
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--ci_number_of_samples_per_count_vector"
description: "The number of read generating probability vectors sampled per sampled\
\ count vector. The crebility \nintervals are calculated by first sampling P(C\
\ | D) and then sampling P(Theta | C) for each sampled \ncount vector. This\
\ option controls how many Theta vectors are sampled per sampled count vector.\
\ \n(Default: 50)\n"
info: null
example:
- 50
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--keep_intermediate_files"
description: "Keep temporary files generated by RSEM. RSEM creates a temporary\
\ directory, 'sample_name.temp', \ninto which it puts all intermediate output\
\ files. If this directory already exists, RSEM overwrites \nall files generated\
\ by previous RSEM runs inside of it. By default, after RSEM finishes, the \n\
temporary directory is deleted. Set this option to prevent the deletion of this\
\ directory and the \nintermediate files inside of it.\n"
info: null
direction: "input"
- type: "string"
name: "--temporary_folder"
description: "Set where to put the temporary files generated by RSEM. If the folder\
\ specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n"
info: null
example:
- "sample_name.temp"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--time"
description: "Output time consumed by each step of RSEM to 'sample_name.time'.\n"
info: null
direction: "input"
- name: "Prior-Enhanced RSEM Options"
arguments:
- type: "boolean_true"
name: "--run_pRSEM"
description: "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's\
\ initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input\
\ RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq\
\ peak information to partition isoforms (e.g. in pRSEM's default \npartition\
\ model), either ChIP-seq peak file (with the '--chipseq_peak_file' option)\
\ or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables\
\ are required (with the \n'--chipseq_target_read_files <string>', '--chipseq_control_read_files\
\ <string>', and '--bowtie_path \n<path> options), otherwise, ChIP-seq FASTQ\
\ files for target and control and the path to Bowtie \nexecutables are required.\n"
info: null
direction: "input"
- type: "file"
name: "--chipseq_peak_file"
description: "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4,\
\ format. This file is used \nwhen running prior-enhanced RSEM in the default\
\ two-partition model. It partitions isoforms by \nwhether they have ChIP-seq\
\ overlapping with their transcription start site region or not. Each \npartition\
\ will have its own prior parameter learned from a training set. This file can\
\ be either \ngzipped or ungzipped.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_target_read_files"
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq target.\
\ This option is used when running \nprior-enhanced RSEM. It provides information\
\ to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped\
\ or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>'\
\ \nand '--chipseq_control_read_files <string>' must be defined when this option\
\ is specified.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_control_read_files"
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.\
\ This option is used when running \nprior-enhanced RSEM. It provides information\
\ to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with\
\ a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>' and \n'--chipseq_target_read_files\
\ <string>' must be defined when this option is specified.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_read_files_multi_targets"
description: "Comma-separated full path of FASTQ read files for multiple ChIP-seq\
\ targets. This option is used when \nrunning prior-enhanced RSEM, where prior\
\ is learned from multiple complementary data sets. It provides \ninformation\
\ to calculate ChIP-seq signals. All files can be either ungzipped or gzipped\
\ with a suffix \n'.gz' or '.gzip'. When this option is specified, the option\
\ '--bowtie_path <path>' must be defined and \nthe option '--partition_model\
\ <string>' will be set to 'cmb_lgt' automatically.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_bed_files_multi_targets"
description: "Comma-separated full path of BED files for multiple ChIP-seq targets.\
\ This option is used when running \nprior-enhanced RSEM, where prior is learned\
\ from multiple complementary data sets. It provides information \nof ChIP-seq\
\ signals and must have at least the first six BED columns. All files can be\
\ either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option\
\ is specified, the option '--partition_model \n<string>' will be set to 'cmb_lgt'\
\ automatically.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--cap_stacked_chipseq_reads"
description: "Keep a maximum number of ChIP-seq reads that aligned to the same\
\ genomic interval. This option is used \nwhen running prior-enhanced RSEM,\
\ where prior is learned from multiple complementary data sets. This \noption\
\ is only in use when either '--chipseq_read_files_multi_targets <string>' or\
\ \n'--chipseq_bed_files_multi_targets <string>' is specified.\n"
info: null
direction: "input"
- type: "integer"
name: "--n_max_stacked_chipseq_reads"
description: "The maximum number of stacked ChIP-seq reads to keep. This option\
\ is used when running prior-enhanced \nRSEM, where prior is learned from multiple\
\ complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads'\
\ is set.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--partition_model"
description: "A keyword to specify the partition model used by prior-enhanced\
\ RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3,\
\ lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk,\
\ pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above\
\ models are learned from a training set. For detailed explanations, please\
\ \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n"
info: null
example:
- "pk"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculate expression with RSEM. \n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Transcriptome"
- "Index"
- "Alignment"
- "RSEM"
license: "GPL-3.0"
references:
doi:
- "https://doi.org/10.1186/1471-2105-12-323"
links:
repository: "https://github.com/deweylab/RSEM"
homepage: "https://deweylab.github.io/RSEM/"
documentation: "https://deweylab.github.io/RSEM/rsem-calculate-expression.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "build-essential"
- "gcc"
- "g++"
- "make"
- "wget"
- "zlib1g-dev"
- "unzip"
interactive: false
- type: "docker"
run:
- "apt-get update && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip\
\ && \\\nunzip 2.7.11a.zip && \\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR\
\ /usr/local/bin && \\\ncd && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip\
\ && \\\nunzip v1.3.3.zip && \\\ncd RSEM-1.3.3 && \\\nmake && \\\nmake install\n"
env:
- "STAR_VERSION=2.7.11b"
- "RSEM_VERSION=1.3.3"
- type: "docker"
run:
- "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\
\ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\
\ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\
\ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\
\ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\
d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\
\ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rsem/rsem_calculate_expression/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/rsem/rsem_calculate_expression"
executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf"
viash_version: "0.9.0"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "biobox"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

View File

@@ -3,7 +3,7 @@ manifest {
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Calculate expression with RSEM.\n'
description = 'Calculate expression with RSEM. \n'
}
process.container = 'nextflow/bash:latest'

View File

@@ -0,0 +1,839 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "rsem_calculate_expression",
"description": "Calculate expression with RSEM. \n",
"type": "object",
"definitions": {
"input" : {
"title": "Input",
"type": "object",
"description": "No description",
"properties": {
"id": {
"type":
"string",
"description": "Type: `string`. Sample ID",
"help_text": "Type: `string`. Sample ID."
}
,
"strandedness": {
"type":
"string",
"description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity",
"help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, reverse",
"enum": ["forward", "reverse", "unstranded"]
}
,
"paired": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Paired-end reads or not?",
"help_text": "Type: `boolean_true`, default: `false`. Paired-end reads or not?"
,
"default": "False"
}
,
"input": {
"type":
"string",
"description": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification",
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification."
}
,
"index": {
"type":
"string",
"description": "Type: `file`. RSEM index",
"help_text": "Type: `file`. RSEM index."
}
,
"extra_args": {
"type":
"string",
"description": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples",
"help_text": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples."
}
}
},
"output" : {
"title": "Output",
"type": "object",
"description": "No description",
"properties": {
"counts_gene": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level",
"help_text": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level"
,
"default": "$id.$key.counts_gene.results"
}
,
"counts_transcripts": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level",
"help_text": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level"
,
"default": "$id.$key.counts_transcripts.results"
}
,
"stat": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics",
"help_text": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics"
,
"default": "$id.$key.stat.stat"
}
,
"logs": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs",
"help_text": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs"
,
"default": "$id.$key.logs.log"
}
,
"bam_star": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)",
"help_text": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)"
,
"default": "$id.$key.bam_star.bam"
}
,
"bam_genome": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)",
"help_text": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)"
,
"default": "$id.$key.bam_genome.bam"
}
,
"bam_transcript": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)",
"help_text": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)"
,
"default": "$id.$key.bam_transcript.bam"
}
,
"sort_bam_by_read_name": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name",
"help_text": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name. Setting this option on will produce \ndeterministic maximum likelihood estimations from independent runs. Note that sorting will take long \ntime and lots of memory.\n"
,
"default": "False"
}
,
"no_bam_output": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Do not output any BAM file",
"help_text": "Type: `boolean_true`, default: `false`. Do not output any BAM file."
,
"default": "False"
}
,
"sampling_for_bam": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities",
"help_text": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure \nincludes the alignment to the \"noise\" transcript, which does not appear in the BAM file. Only the \nsampled alignment has a weight of 1. All other alignments have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared in the BAM file should have weight 0.\n"
,
"default": "False"
}
,
"output_genome_bam": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name",
"help_text": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name.genome.bam\u0027, with alignments mapped to genomic coordinates and \nannotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM \npackage) to sort and index the bam file. \u0027sample_name.genome.sorted.bam\u0027 and \u0027sample_name.genome.sorted.bam.bai\u0027 \nwill be generated.\n"
,
"default": "False"
}
,
"sort_bam_by_coordinate": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices",
"help_text": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices.\n"
,
"default": "False"
}
}
},
"basic options" : {
"title": "Basic Options",
"type": "object",
"description": "No description",
"properties": {
"no_qualities": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores",
"help_text": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores."
,
"default": "False"
}
,
"alignments": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format",
"help_text": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format. The exact file format will be determined \nautomatically.\n"
,
"default": "False"
}
,
"fai": {
"type":
"string",
"description": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on",
"help_text": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on. \u003cfile\u003e is a FAI format file containing each reference sequence\u0027s \nname and length. Please refer to the SAM official website for the details of FAI format.\n"
}
,
"bowtie2": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads",
"help_text": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM does not handle indel, local \nand discordant alignments, the Bowtie2 parameters are set in a way to avoid those alignments. In \nparticular, we use options \u0027--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1\u0027 \nby default. The last parameter of \u0027--score_min\u0027, \u0027-0.1\u0027, is the negative of maximum mismatch rate. \nThis rate can be set by option \u0027--bowtie2_mismatch_rate\u0027. If reads are paired-end, we additionally \nuse options \u0027--no_mixed\u0027 and \u0027--no_discordant\u0027.\n"
,
"default": "False"
}
,
"star": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Use STAR to align reads",
"help_text": "Type: `boolean_true`, default: `false`. Use STAR to align reads. Alignment parameters are from ENCODE3\u0027s STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR\u0027s Output BAM file is unsorted. It is stored in RSEM\u0027s \ntemporary directory with name as \u0027sample_name.bam\u0027. Each STAR job will have its own private copy of \nthe genome in memory.\n"
,
"default": "False"
}
,
"hisat2_hca": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast",
"help_text": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast.\n"
,
"default": "False"
}
,
"append_names": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name",
"help_text": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name.isoforms.results\u0027 and \u0027sample_name.genes.results\u0027.\n"
,
"default": "False"
}
,
"seed": {
"type":
"integer",
"description": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals",
"help_text": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals. The seed must be a non-negative 32 bit integer.\n"
}
,
"single_cell_prior": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals",
"help_text": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which \nencourage the sparsity of the expression levels.\n"
,
"default": "False"
}
,
"calc_pme": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates",
"help_text": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates."
,
"default": "False"
}
,
"calc_ci": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates",
"help_text": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates. The credibility level can be \nchanged by setting \u0027--ci_credibility_level\u0027.\n"
,
"default": "False"
}
,
"quiet": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Suppress the output of logging information",
"help_text": "Type: `boolean_true`, default: `false`. Suppress the output of logging information."
,
"default": "False"
}
}
},
"aligner options" : {
"title": "Aligner Options",
"type": "object",
"description": "No description",
"properties": {
"seed_length": {
"type":
"integer",
"description": "Type: `integer`, example: `25`. Seed length used by the read aligner",
"help_text": "Type: `integer`, example: `25`. Seed length used by the read aligner. Providing the correct value is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie\u0027s seed length parameter. Any read with its or at least \none of its mates\u0027 (for paired-end reads) length less than this value will be ignored. If the \nreferences are not added poly(A) tails, the minimum allowed value is 5, otherwise, the minimum \nallowed value is 25. Note that this script will only check if the value \u003e= 5 and give a warning \nmessage if the value \u003c 25 but \u003e= 5. (Default: 25)\n"
}
,
"phred64_quals": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver",
"help_text": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. \u003e= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n"
,
"default": "False"
}
,
"solexa_quals": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver",
"help_text": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver. \u003c 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n"
,
"default": "False"
}
,
"bowtie_n": {
"type":
"integer",
"description": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed",
"help_text": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)\n",
"enum": [0, 1, 2, 3]
}
,
"bowtie_e": {
"type":
"integer",
"description": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment",
"help_text": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)\n"
}
,
"bowtie_m": {
"type":
"integer",
"description": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist",
"help_text": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist. (Default: 200)\n"
}
,
"bowtie_chunkmbs": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n",
"help_text": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n"
}
,
"bowtie2_mismatch_rate": {
"type":
"number",
"description": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed",
"help_text": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed. (Default: 0.1)\n"
}
,
"bowtie2_k": {
"type":
"integer",
"description": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read",
"help_text": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read. (Default: 200)\n"
}
,
"bowtie2_sensitivity_level": {
"type":
"string",
"description": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode",
"help_text": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode. This option controls how \nhard Bowtie 2 tries to find alignments. \u003cstring\u003e must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\". The four candidates correspond to Bowtie 2\u0027s \"--very-fast\", \"--fast\", \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\" - use Bowtie 2\u0027s default)\n",
"enum": ["very_fast", "fast", "sensitive", "very_sensitive"]
}
,
"star_gzipped_read_file": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip",
"help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip. (Default: false)\n"
,
"default": "False"
}
,
"star_bzipped_read_file": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2",
"help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2. (Default: false)\n"
,
"default": "False"
}
,
"star_output_genome_bam": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name",
"help_text": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name.STAR.genome.bam\u0027. \nThis file is NOT sorted by genomic coordinate. In this file, according to STAR\u0027s manual, \u0027paired \nends of an alignment are always adjacent, and multiple alignments of a read are adjacent as well\u0027. \n(Default: false)\n"
,
"default": "False"
}
}
},
"advanced options" : {
"title": "Advanced Options",
"type": "object",
"description": "No description",
"properties": {
"tag": {
"type":
"string",
"description": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments",
"help_text": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments. The field should have the format \u003ctagName\u003e:i:\u003cvalue\u003e, where a \u003cvalue\u003e bigger than 0 \nindicates a read with too many alignments. (Default: \"\")\n"
}
,
"fragment_length_min": {
"type":
"integer",
"description": "Type: `integer`, example: `1`. Minimum read/insert length allowed",
"help_text": "Type: `integer`, example: `1`. Minimum read/insert length allowed. This is also the value for the Bowtie/Bowtie2 -I option. \n(Default: 1)\n"
}
,
"fragment_length_max": {
"type":
"integer",
"description": "Type: `integer`, example: `1000`. Maximum read/insert length allowed",
"help_text": "Type: `integer`, example: `1000`. Maximum read/insert length allowed. This is also the value for the Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n"
}
,
"fragment_length_mean": {
"type":
"integer",
"description": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian",
"help_text": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian. (Default: -1, which disables use of the fragment length distribution)\n"
}
,
"gragment_length_sd": {
"type":
"number",
"description": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian",
"help_text": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes that all fragments are of the same length, \ngiven by the rounded value of --fragment_length_mean).\n"
}
,
"estimate_rspd": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data",
"help_text": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n"
,
"default": "False"
}
,
"num_rspd_bins": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. Number of bins in the RSPD",
"help_text": "Type: `integer`, example: `20`. Number of bins in the RSPD. Only relevant when \u0027--estimate_rspd\u0027 is specified. Use of the default \nsetting is recommended. (Default: 20)\n"
}
,
"gibbs_burnin": {
"type":
"integer",
"description": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler",
"help_text": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler. Each round passes over the entire data set \nonce. If RSEM can use multiple threads, multiple Gibbs samplers will start at the same time and all \nsamplers share the same burn-in number. (Default: 200)\n"
}
,
"gibbs_number_of_samples": {
"type":
"integer",
"description": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers",
"help_text": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers. (Default: 1000)\n"
}
,
"gibbs_sampling_gap": {
"type":
"integer",
"description": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects",
"help_text": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects. If the count vector after \nround N is collected, the count vector after round N + \u003cint\u003e will also be collected. (Default: 1)\n"
}
,
"ci_credibility_level": {
"type":
"number",
"description": "Type: `double`, example: `0.95`. The credibility level for credibility intervals",
"help_text": "Type: `double`, example: `0.95`. The credibility level for credibility intervals. (Default: 0.95)\n"
}
,
"ci_number_of_samples_per_count_vector": {
"type":
"integer",
"description": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector",
"help_text": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector. The crebility \nintervals are calculated by first sampling P(C | D) and then sampling P(Theta | C) for each sampled \ncount vector. This option controls how many Theta vectors are sampled per sampled count vector. \n(Default: 50)\n"
}
,
"keep_intermediate_files": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM",
"help_text": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM. RSEM creates a temporary directory, \u0027sample_name.temp\u0027, \ninto which it puts all intermediate output files. If this directory already exists, RSEM overwrites \nall files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the \ntemporary directory is deleted. Set this option to prevent the deletion of this directory and the \nintermediate files inside of it.\n"
,
"default": "False"
}
,
"temporary_folder": {
"type":
"string",
"description": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM",
"help_text": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM. If the folder specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n"
}
,
"time": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name",
"help_text": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name.time\u0027.\n"
,
"default": "False"
}
}
},
"prior-enhanced rsem options" : {
"title": "Prior-Enhanced RSEM Options",
"type": "object",
"description": "No description",
"properties": {
"run_pRSEM": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM)",
"help_text": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform\u0027s initial pseudo-count for \nRSEM\u0027s Gibbs sampling, will be learned from input RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq peak information to partition isoforms (e.g. in pRSEM\u0027s default \npartition model), either ChIP-seq peak file (with the \u0027--chipseq_peak_file\u0027 option) or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables are required (with the \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027, \u0027--chipseq_control_read_files \u003cstring\u003e\u0027, and \u0027--bowtie_path \n\u003cpath\u003e options), otherwise, ChIP-seq FASTQ files for target and control and the path to Bowtie \nexecutables are required.\n"
,
"default": "False"
}
,
"chipseq_peak_file": {
"type":
"string",
"description": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i",
"help_text": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i.e. BED6+4, format. This file is used \nwhen running prior-enhanced RSEM in the default two-partition model. It partitions isoforms by \nwhether they have ChIP-seq overlapping with their transcription start site region or not. Each \npartition will have its own prior parameter learned from a training set. This file can be either \ngzipped or ungzipped.\n"
}
,
"chipseq_target_read_files": {
"type":
"string",
"description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target",
"help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target. This option is used when running \nprior-enhanced RSEM. It provides information to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 \nand \u0027--chipseq_control_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n"
}
,
"chipseq_control_read_files": {
"type":
"string",
"description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol",
"help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol. This option is used when running \nprior-enhanced RSEM. It provides information to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 and \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n"
}
,
"chipseq_read_files_multi_targets": {
"type":
"string",
"description": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets",
"help_text": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets. This option is used when \nrunning prior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides \ninformation to calculate ChIP-seq signals. All files can be either ungzipped or gzipped with a suffix \n\u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--bowtie_path \u003cpath\u003e\u0027 must be defined and \nthe option \u0027--partition_model \u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n"
}
,
"chipseq_bed_files_multi_targets": {
"type":
"string",
"description": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets",
"help_text": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets. This option is used when running \nprior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides information \nof ChIP-seq signals and must have at least the first six BED columns. All files can be either ungzipped \nor gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--partition_model \n\u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n"
}
,
"cap_stacked_chipseq_reads": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval",
"help_text": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval. This option is used \nwhen running prior-enhanced RSEM, where prior is learned from multiple complementary data sets. This \noption is only in use when either \u0027--chipseq_read_files_multi_targets \u003cstring\u003e\u0027 or \n\u0027--chipseq_bed_files_multi_targets \u003cstring\u003e\u0027 is specified.\n"
,
"default": "False"
}
,
"n_max_stacked_chipseq_reads": {
"type":
"integer",
"description": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep",
"help_text": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep. This option is used when running prior-enhanced \nRSEM, where prior is learned from multiple complementary data sets. This option is only in use when the \noption \u0027--cap_stacked_chipseq_reads\u0027 is set.\n"
}
,
"partition_model": {
"type":
"string",
"description": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM",
"help_text": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3, lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above models are learned from a training set. For detailed explanations, please \nsee prior-enhanced RSEM\u0027s paper. (Default: \u0027pk\u0027)\n"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/basic options"
},
{
"$ref": "#/definitions/aligner options"
},
{
"$ref": "#/definitions/advanced options"
},
{
"$ref": "#/definitions/prior-enhanced rsem options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -1,12 +1,28 @@
name: "rseqc_bamstat"
namespace: "rseqc"
version: "main"
authors:
- name: "Emma Rousseau"
roles:
- "author"
- "maintainer"
info:
links:
email: "emma@data-intuitive.com"
github: "emmarousseau"
linkedin: "emmarousseau1"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Bioinformatician"
argument_groups:
- name: "Input"
arguments:
- type: "file"
name: "--input"
description: "input alignment file in BAM or SAM format"
name: "--input_file"
alternatives:
- "-i"
description: "Input alignment file in BAM or SAM format."
info: null
must_exist: true
create_parent: true
@@ -15,14 +31,15 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--map_qual"
name: "--mapq"
alternatives:
- "-q"
description: "Minimum mapping quality (phred scaled) to determine uniquely mapped\
\ reads, default=30."
\ reads. Default: '30'.\n"
info: null
default:
example:
- 30
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
@@ -30,10 +47,8 @@ argument_groups:
arguments:
- type: "file"
name: "--output"
description: "output file (txt) with mapping quality statistics"
description: "Output file (txt) with mapping quality statistics."
info: null
default:
- "$id.mapping_quality.txt"
must_exist: true
create_parent: true
required: false
@@ -44,32 +59,30 @@ resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Generate statistics from a bam file.\n"
description: "Generate statistics from a bam file."
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
- type: "file"
path: "test.paired_end.sorted.bam"
info:
migration_info:
git_repo: "https://github.com/nf-core/rnaseq.git"
paths:
- "modules/nf-core/rseqc/bamstat/main.nf"
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
keywords:
- "rnaseq"
- "genomics"
license: "GPL-3.0"
references:
doi:
- "10.1093/bioinformatics/bts356"
links:
repository: "https://github.com/MonashBioinformaticsPlatform/RSeQC"
homepage: "https://rseqc.sourceforge.net/"
documentation: "https://rseqc.sourceforge.net/#bam-stat-py"
issue_tracker: "https://github.com/MonashBioinformaticsPlatform/RSeQC/issues"
runners:
- type: "executable"
id: "executable"
@@ -138,20 +151,19 @@ runners:
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
image: "python:3.10"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "python3-pip"
interactive: false
- type: "python"
user: false
packages:
- "RSeQC"
upgrade: true
- type: "docker"
run:
- "echo \"RSeQC bam_stat.py: $(bam_stat.py --version | cut -d' ' -f2-)\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
@@ -163,31 +175,28 @@ build_info:
output: "target/nextflow/rseqc/rseqc_bamstat"
executable: "target/nextflow/rseqc/rseqc_bamstat/main.nf"
viash_version: "0.9.0"
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
git_tag: "v0.2.0-26-ga13b57d"
package_config:
name: "rnaseq"
version: "main"
info:
test_resources:
- path: "gs://viash-hub-test-data/rnaseq/v1"
dest: "testData"
repositories:
- type: "vsh"
name: "biobox"
repo: "vsh/biobox"
tag: "main"
- type: "vsh"
name: "craftbox"
repo: "craftbox"
tag: "v0.1.0"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"

Some files were not shown because too many files have changed in this diff Show More