Build branch main with version main (0c8a7eb)
Build pipeline: viash-hub.rnaseq.main-nn8dl
Source commit: 0c8a7eb648
Source message: remove citation
This commit is contained in:
@@ -17,7 +17,7 @@ config_mods: |
|
|||||||
repositories:
|
repositories:
|
||||||
- name: biobox
|
- name: biobox
|
||||||
type: vsh
|
type: vsh
|
||||||
repo: vsh/biobox
|
repo: biobox
|
||||||
tag: main
|
tag: main
|
||||||
- name: craftbox
|
- name: craftbox
|
||||||
type: vsh
|
type: vsh
|
||||||
|
|||||||
@@ -1,89 +0,0 @@
|
|||||||
name: "bbmap_bbsplit"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/bbmap/bbsplit/main.nf, modules/nf-core/bbmap/bbsplit/meta.yml]
|
|
||||||
last_sha: 277bd337739a8b8f753fa7b5eda6743b9b6acb89
|
|
||||||
|
|
||||||
description: |
|
|
||||||
Split sequencing reads by mapping them to multiple references simultaneously.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--id"
|
|
||||||
type: string
|
|
||||||
description: Sample ID
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean
|
|
||||||
default: false
|
|
||||||
description: Paired fastq files or not?
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ","
|
|
||||||
description: Input fastq files, either one or two (paired)
|
|
||||||
example: sample.fastq
|
|
||||||
- name: "--primary_ref"
|
|
||||||
type: file
|
|
||||||
description: Primary reference FASTA
|
|
||||||
- name: "--bbsplit_fasta_list"
|
|
||||||
type: file
|
|
||||||
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit.
|
|
||||||
- name: "--only_build_index"
|
|
||||||
type: boolean
|
|
||||||
description: true = only build index; false = mapping
|
|
||||||
- name: "--built_bbsplit_index"
|
|
||||||
type: file
|
|
||||||
description: Directory with index files
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--fastq_1"
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: Output file for read 1.
|
|
||||||
direction: output
|
|
||||||
must_exist: false
|
|
||||||
default: $id.$key.read_1.fastq
|
|
||||||
- name: "--fastq_2"
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
description: Output file for read 2.
|
|
||||||
direction: output
|
|
||||||
default: $id.$key.read_2.fastq
|
|
||||||
- name: "--bbsplit_index"
|
|
||||||
type: file
|
|
||||||
description: Directory with index files
|
|
||||||
direction: output
|
|
||||||
must_exist: false
|
|
||||||
default: BBSplit_index
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/minimal_test/reference/genome.fasta
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
|
|
||||||
- path: /testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa
|
|
||||||
- path: /testData/minimal_test/reference/bbsplit_fasta/human.fa
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: docker
|
|
||||||
run: |
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y build-essential openjdk-17-jdk wget tar && \
|
|
||||||
wget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \
|
|
||||||
tar xzf BBMap_39.01.tar.gz && \
|
|
||||||
cp -r bbmap/* /usr/local/bin
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
function clean_up {
|
|
||||||
rm -rf "$tmpdir"
|
|
||||||
}
|
|
||||||
trap clean_up EXIT
|
|
||||||
|
|
||||||
avail_mem=3072
|
|
||||||
|
|
||||||
if [ ! -d "$par_built_bbsplit_index" ]; then
|
|
||||||
other_refs=()
|
|
||||||
while IFS="," read -r name path
|
|
||||||
do
|
|
||||||
other_refs+=("ref_$name=$path")
|
|
||||||
done < "$par_bbsplit_fasta_list"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if $par_only_build_index; then
|
|
||||||
if [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
|
|
||||||
bbsplit.sh \
|
|
||||||
-Xmx${avail_mem}M \
|
|
||||||
ref_primary="$par_primary_ref" ${other_refs[@]} \
|
|
||||||
path=$par_bbsplit_index \
|
|
||||||
threads=${meta_cpus:-1}
|
|
||||||
else
|
|
||||||
echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files."
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
IFS="," read -ra input <<< "$par_input"
|
|
||||||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
|
|
||||||
index_files=''
|
|
||||||
if [ -d "$par_built_bbsplit_index" ]; then
|
|
||||||
index_files="path=$par_built_bbsplit_index"
|
|
||||||
elif [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
|
|
||||||
index_files="ref_primary=$par_primary_ref ${other_refs[@]}"
|
|
||||||
else
|
|
||||||
echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files."
|
|
||||||
fi
|
|
||||||
if $par_paired; then
|
|
||||||
bbsplit.sh \
|
|
||||||
-Xmx${avail_mem}M \
|
|
||||||
$index_files \
|
|
||||||
threads=${meta_cpus:-1} \
|
|
||||||
in=${input[0]} \
|
|
||||||
in2=${input[1]} \
|
|
||||||
basename=${tmpdir}/%_#.fastq \
|
|
||||||
refstats=bbsplit_stats.txt
|
|
||||||
read1=$(find $tmpdir/ -iname primary_1*)
|
|
||||||
read2=$(find $tmpdir/ -iname primary_2*)
|
|
||||||
cp $read1 $par_fastq_1
|
|
||||||
cp $read2 $par_fastq_2
|
|
||||||
else
|
|
||||||
bbsplit.sh \
|
|
||||||
-Xmx${avail_mem}M \
|
|
||||||
$index_files \
|
|
||||||
threads=${meta_cpus:-1} \
|
|
||||||
in=${input[0]} \
|
|
||||||
basename=${tmpdir}/%.fastq \
|
|
||||||
refstats=bbsplit_stats.txt
|
|
||||||
read1=$(find $tmpdir/ -iname primary*)
|
|
||||||
cp $read1 $par_fastq_1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Test $meta_functionality_name"
|
|
||||||
|
|
||||||
cat > bbsplit_fasta_list.txt << HERE
|
|
||||||
sarscov2,$meta_resources_dir/sarscov2.fa
|
|
||||||
human,$meta_resources_dir/human.fa
|
|
||||||
HERE
|
|
||||||
|
|
||||||
echo ">>> Building BBSplit index"
|
|
||||||
"$meta_executable" \
|
|
||||||
--primary_ref "$meta_resources_dir/genome.fasta" \
|
|
||||||
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
|
|
||||||
--only_build_index true \
|
|
||||||
--bbsplit_index "BBSplit_index"
|
|
||||||
|
|
||||||
echo ">>> Check whether output exists"
|
|
||||||
[ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1
|
|
||||||
[ -z "$(ls -A 'BBSplit_index')" ] && echo "BBSplit index is empty!" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Filtering ribosomal RNA reads"
|
|
||||||
|
|
||||||
echo ">>> Testing with single-end reads and primary/non-primary FASTA files"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired false \
|
|
||||||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz" \
|
|
||||||
--only_build_index false \
|
|
||||||
--primary_ref "$meta_resources_dir/genome.fasta" \
|
|
||||||
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
|
|
||||||
--fastq_1 "filtered_SRR6357070_1.fastq.gz"
|
|
||||||
|
|
||||||
echo ">>> Check whether output exists"
|
|
||||||
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1
|
|
||||||
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1
|
|
||||||
|
|
||||||
rm filtered_SRR6357070_1.fastq.gz
|
|
||||||
|
|
||||||
echo ">>> Testing with paired-end reads and primary/non-primary FASTA files"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired true \
|
|
||||||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \
|
|
||||||
--only_build_index false \
|
|
||||||
--primary_ref "$meta_resources_dir/genome.fasta" \
|
|
||||||
--bbsplit_fasta_list "bbsplit_fasta_list.txt" \
|
|
||||||
--fastq_1 "filtered_SRR6357070_1.fastq.gz" \
|
|
||||||
--fastq_2 "filtered_SRR6357070_2.fastq.gz"
|
|
||||||
|
|
||||||
echo ">>> Check whether output exists"
|
|
||||||
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1
|
|
||||||
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1
|
|
||||||
[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1
|
|
||||||
[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1
|
|
||||||
|
|
||||||
rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz
|
|
||||||
|
|
||||||
echo ">>> Testing with single-end reads and BBSplit index"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired false \
|
|
||||||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz" \
|
|
||||||
--only_build_index false \
|
|
||||||
--built_bbsplit_index "BBSplit_index" \
|
|
||||||
--fastq_1 "filtered_SRR6357070_1.fastq.gz"
|
|
||||||
|
|
||||||
echo ">>> Check whether output exists"
|
|
||||||
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file does not exist!" && exit 1
|
|
||||||
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered reads file is empty!" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Testing with paired-end reads and BBSplit index"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired true \
|
|
||||||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz" \
|
|
||||||
--only_build_index false \
|
|
||||||
--built_bbsplit_index "BBSplit_index" \
|
|
||||||
--fastq_1 "filtered_SRR6357070_1.fastq.gz" \
|
|
||||||
--fastq_2 "filtered_SRR6357070_2.fastq.gz"
|
|
||||||
|
|
||||||
echo ">>> Check whether output exists"
|
|
||||||
[ ! -f "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file does not exist!" && exit 1
|
|
||||||
[ ! -s "filtered_SRR6357070_1.fastq.gz" ] && echo "Filtered read 1 file is empty!" && exit 1
|
|
||||||
[ ! -f "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file does not exist!" && exit 1
|
|
||||||
[ ! -s "filtered_SRR6357070_2.fastq.gz" ] && echo "Filtered read 2 file is empty!" && exit 1
|
|
||||||
|
|
||||||
rm filtered_SRR6357070_1.fastq.gz filtered_SRR6357070_2.fastq.gz
|
|
||||||
|
|
||||||
echo "All tests succeeded!"
|
|
||||||
exit 0
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
name: "fastqc"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/fastqc/main.nf, modules/nf-core/fastqc/meta.yml]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean
|
|
||||||
required: false
|
|
||||||
default: false
|
|
||||||
description: Paired fastq files or not?
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ","
|
|
||||||
description: Input fastq files, either one or two (paired)
|
|
||||||
example: sample.fastq
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--fastqc_html_1"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
description: FastQC HTML report for read 1.
|
|
||||||
default: $id.read_1.fastqc.html
|
|
||||||
- name: "--fastqc_html_2"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
description: FastQC HTML report for read 2.
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.read_2.fastqc.html
|
|
||||||
- name: "--fastqc_zip_1"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
description: FastQC report archive for read 1.
|
|
||||||
default: $id.read_1.fastqc.zip
|
|
||||||
- name: "--fastqc_zip_2"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
description: FastQC report archive for read 2.
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.read_2.fastqc.zip
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [ fastqc ]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
function clean_up {
|
|
||||||
rm -rf "$tmpdir"
|
|
||||||
}
|
|
||||||
trap clean_up EXIT
|
|
||||||
|
|
||||||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_name-XXXXXXXX")
|
|
||||||
|
|
||||||
IFS="," read -ra input <<< $par_input
|
|
||||||
count=${#input[@]}
|
|
||||||
|
|
||||||
if $par_paired; then
|
|
||||||
echo "Paired - $count"
|
|
||||||
if [ $count -ne 2 ]; then
|
|
||||||
echo "Paired end input requires two files"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Not Paired - $count"
|
|
||||||
if [ $count -ne 1 ]; then
|
|
||||||
echo "Single end input requires one file"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
fastqc -o $tmpdir ${input[*]}
|
|
||||||
|
|
||||||
file1=$(basename -- "${input[0]}")
|
|
||||||
read1="${file1%.fastq*}"
|
|
||||||
[[ -e "${tmpdir}/${read1}_fastqc.html" ]] && cp "${tmpdir}/${read1}_fastqc.html" $par_fastqc_html_1
|
|
||||||
[[ -e "${tmpdir}/${read1}_fastqc.zip" ]] && cp "${tmpdir}/${read1}_fastqc.zip" $par_fastqc_zip_1
|
|
||||||
|
|
||||||
if $par_paired; then
|
|
||||||
file2=$(basename -- "${input[1]}")
|
|
||||||
read2="${file2%.fastq*}"
|
|
||||||
[[ -e "${tmpdir}/${read2}_fastqc.html" ]] && cp "${tmpdir}/${read2}_fastqc.html" $par_fastqc_html_2
|
|
||||||
[[ -e "${tmpdir}/${read2}_fastqc.zip" ]] && cp "${tmpdir}/${read2}_fastqc.zip" $par_fastqc_zip_2
|
|
||||||
fi
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
echo ">>> Testing for paired-end reads"
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired true \
|
|
||||||
--input $meta_resources_dir/SRR6357070_1.fastq.gz,$meta_resources_dir/SRR6357070_2.fastq.gz \
|
|
||||||
--fastqc_html_1 SRR6357070_1.html \
|
|
||||||
--fastqc_html_2 SRR6357070_2.html \
|
|
||||||
--fastqc_zip_1 SRR6357070_1.zip \
|
|
||||||
--fastqc_zip_2 SRR6357070_2.zip
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[[ ! -f "SRR6357070_1.html" ]] || [[ ! -f "SRR6357070_2.html" ]] && echo "Report file missing" && exit 1
|
|
||||||
[[ ! -s "SRR6357070_1.html" ]] || [[ ! -s "SRR6357070_2.html" ]] && echo "Report file empty" && exit 1
|
|
||||||
[[ ! -f "SRR6357070_1.zip" ]] || [[ ! -f "SRR6357070_2.zip" ]] && echo "Zip file missing" && exit 1
|
|
||||||
|
|
||||||
rm SRR6357070_1.html SRR6357070_2.html SRR6357070_1.zip SRR6357070_2.zip
|
|
||||||
|
|
||||||
echo ">>> Testing for single-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired false \
|
|
||||||
--input $meta_resources_dir/SRR6357070_1.fastq.gz \
|
|
||||||
--fastqc_html_1 SRR6357070_1.html \
|
|
||||||
--fastqc_zip_1 SRR6357070_1.zip
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[ ! -f "SRR6357070_1.html" ] && echo "Report file missing" && exit 1
|
|
||||||
[ ! -s "SRR6357070_1.html" ] && echo "Report file empty" && exit 1
|
|
||||||
[ ! -f "SRR6357070_1.zip" ] && echo "Zip file missing" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Test finished successfully"
|
|
||||||
exit 0
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
name: "fq_subsample"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/fq/subsample/main.nf, modules/nf-core/fq/subsample/meta.yml]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
description: Input fastq files to subsample
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ";"
|
|
||||||
- name: "--extra_args"
|
|
||||||
type: string
|
|
||||||
default: ""
|
|
||||||
description: Extra arguments to pass to fq subsample
|
|
||||||
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--output_1"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
default: $id.read_1.subsampled.fastq
|
|
||||||
description: Sampled read 1 fastq files
|
|
||||||
- name: "--output_2"
|
|
||||||
type: file
|
|
||||||
must_exist: false
|
|
||||||
direction: output
|
|
||||||
default: $id.read_2.subsampled.fastq
|
|
||||||
description: Sampled read 2 fastq files
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: docker
|
|
||||||
env:
|
|
||||||
- TZ=Europe/Brussels
|
|
||||||
run: |
|
|
||||||
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends build-essential git-all curl && \
|
|
||||||
curl https://sh.rustup.rs -sSf | sh -s -- -y && \
|
|
||||||
. "$HOME/.cargo/env" && \
|
|
||||||
git clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \
|
|
||||||
mv fq /usr/local/ && cd /usr/local/fq && \
|
|
||||||
cargo install --locked --path . && \
|
|
||||||
mv /usr/local/fq/target/release/fq /usr/local/bin/
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
IFS=";" read -ra input <<< $par_input
|
|
||||||
n_fastq=${#input[@]}
|
|
||||||
|
|
||||||
required_args=("-p" "--probability" "-n" "--read-count")
|
|
||||||
for arg in "${required_args[@]}"; do
|
|
||||||
if [[ "$par_extra_args" == *"$arg"* ]]; then
|
|
||||||
echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ $n_fastq -eq 1 ]; then
|
|
||||||
fq subsample $par_extra_args ${input[*]} --r1-dst $par_output_1
|
|
||||||
elif [ $n_fastq -eq 2 ]; then
|
|
||||||
fq subsample $par_extra_args ${input[*]} --r1-dst $par_output_1 --r2-dst $par_output_2
|
|
||||||
else
|
|
||||||
echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
@@ -1,32 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
echo ">>> Testing for paired-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz;$meta_resources_dir/SRR6357070_2.fastq.gz" \
|
|
||||||
--extra_args '--record-count 1000000 --seed 1' \
|
|
||||||
--output_1 SRR6357070_1.subsampled.fastq.gz \
|
|
||||||
--output_2 SRR6357070_2.subsampled.fastq.gz
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[ ! -f "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file for read 1 is missing!" && exit 1
|
|
||||||
[ ! -s "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty!" && exit 1
|
|
||||||
[ ! -f "SRR6357070_2.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file for read 2 is missing" && exit 1
|
|
||||||
[ ! -s "SRR6357070_2.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty" && exit 1
|
|
||||||
|
|
||||||
rm SRR6357070_1.subsampled.fastq.gz SRR6357070_2.subsampled.fastq.gz
|
|
||||||
|
|
||||||
echo ">>> Testing for single-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--input $meta_resources_dir/SRR6357070_1.fastq.gz \
|
|
||||||
--extra_args '--record-count 1000000 --seed 1' \
|
|
||||||
--output_1 SRR6357070_1.subsampled.fastq.gz
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[ ! -f "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is missing" && exit 1
|
|
||||||
[ ! -s "SRR6357070_1.subsampled.fastq.gz" ] && echo "Subsampled FASTQ file is empty" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Tests finished successfully"
|
|
||||||
exit 0
|
|
||||||
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
name: kallisto_index
|
|
||||||
namespace: kallisto
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/kallisto/index/main.nf, modules/nf-core/kallisto/index/meta.yml]
|
|
||||||
last_sha: c0816976384d5e7ee6079c29c45958df1ffa0ee4
|
|
||||||
description: |
|
|
||||||
Create Kallisto index.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--transcriptome_fasta"
|
|
||||||
type: file
|
|
||||||
- name: "--pseudo_aligner_kmer_size"
|
|
||||||
type: integer
|
|
||||||
description: Kmer length passed to indexing step of pseudoaligners.
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--kallisto_index"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
default: Kallisto_index
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/minimal_test/reference/transcriptome.fasta
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: docker
|
|
||||||
run: |
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends wget && \
|
|
||||||
wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \
|
|
||||||
tar -xzf kallisto_linux-v0.50.1.tar.gz && \
|
|
||||||
mv kallisto/kallisto /usr/local/bin/
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
kallisto index \
|
|
||||||
${par_pseudo_aligner_kmer_size:+-k $par_pseudo_aligner_kmer_size} \
|
|
||||||
-i $par_kallisto_index \
|
|
||||||
$par_transcriptome_fasta
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--transcriptome_fasta "$meta_resources_dir/transcriptome.fasta" \
|
|
||||||
--kallisto_index Kallisto
|
|
||||||
|
|
||||||
echo ">>> Checking whether output exists"
|
|
||||||
[ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
|
|
||||||
[ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
|
|
||||||
|
|
||||||
echo "All tests succeeded!"
|
|
||||||
exit 0
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
name: kallisto_quant
|
|
||||||
namespace: kallisto
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/kallisto/quant/main.nf, modules/nf-core/kallisto/quant/meta.yml]
|
|
||||||
last_sha: aff1d2e02717247831644769fc3ba84868c3fdde
|
|
||||||
description: |
|
|
||||||
Computes equivalence classes for reads and quantifies abundances.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ","
|
|
||||||
description: List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean
|
|
||||||
description: Paired reads or not.
|
|
||||||
- name: "--strandedness"
|
|
||||||
type: string
|
|
||||||
description: Sample strand-specificity.
|
|
||||||
- name: "--index"
|
|
||||||
type: file
|
|
||||||
description: Kallisto genome index.
|
|
||||||
- name: "--gtf"
|
|
||||||
type: file
|
|
||||||
description: Optional gtf file for translation of transcripts into genomic coordinates.
|
|
||||||
- name: "--chromosomes"
|
|
||||||
type: file
|
|
||||||
description: Optional tab separated file with chromosome names and lengths.
|
|
||||||
- name: "--fragment_length"
|
|
||||||
type: integer
|
|
||||||
description: For single-end mode only, the estimated average fragment length.
|
|
||||||
- name: "--fragment_length_sd"
|
|
||||||
type: integer
|
|
||||||
description: For single-end mode only, the estimated standard deviation of the fragment length.
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output"
|
|
||||||
type: file
|
|
||||||
description: Kallisto quant results
|
|
||||||
default: "$id.kallisto_quant_results"
|
|
||||||
direction: output
|
|
||||||
- name: "--log"
|
|
||||||
type: file
|
|
||||||
description: File containing log information from running kallisto quant
|
|
||||||
default: "$id.kallisto_quant.log.txt"
|
|
||||||
direction: output
|
|
||||||
- name: "--run_info"
|
|
||||||
type: file
|
|
||||||
description: A json file containing information about the run
|
|
||||||
default: "$id.run_info.json"
|
|
||||||
direction: output
|
|
||||||
- name: "--quant_results_file"
|
|
||||||
type: file
|
|
||||||
description: TSV file containing abundance estimates from Kallisto
|
|
||||||
direction: output
|
|
||||||
default: $id.abundance.tsv
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/minimal_test/reference/transcriptome.fasta
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: docker
|
|
||||||
run: |
|
|
||||||
apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends wget && \
|
|
||||||
wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \
|
|
||||||
tar -xzf kallisto_linux-v0.50.1.tar.gz && \
|
|
||||||
mv kallisto/kallisto /usr/local/bin/
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
IFS="," read -ra input <<< $par_input
|
|
||||||
|
|
||||||
single_end_params=''
|
|
||||||
if [ $par_paired == "false" ]; then
|
|
||||||
if [[ $par_fragment_length < 0 ]] || [[ ! $fragment_length_sd < 0 ]]; then
|
|
||||||
echo "fragment_length and fragment_length_sd must be set for single-end data"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
single_end_params="--single --fragment-length $par_fragment_length --sd $par_fragment_length_sd"
|
|
||||||
fi
|
|
||||||
|
|
||||||
strandedness=''
|
|
||||||
if [[ "$par_extra_args" != *"--fr-stranded"* ]] && [[ "$par_extra_args" != *"--rf-stranded"* ]]; then
|
|
||||||
if [ "$par_strandedness" == 'forward' ]; then
|
|
||||||
strandedness='--fr-stranded'
|
|
||||||
elif [ "$par_strandedness" == 'reverse' ]; then
|
|
||||||
strandedness='--rf-stranded'
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p $par_output
|
|
||||||
|
|
||||||
kallisto quant \
|
|
||||||
${meta_cpus:+--threads $meta_cpus} \
|
|
||||||
--index $par_index \
|
|
||||||
${par_gtf:+--gtf $par_gtf} \
|
|
||||||
${par_chromosomes:+--chromosomes $par_chromosomes} \
|
|
||||||
$single_end_params \
|
|
||||||
$strandedness \
|
|
||||||
$par_extra_args \
|
|
||||||
-o $par_output \
|
|
||||||
${input[*]} 2> >(tee -a ${par_output}/kallisto_quant.log >&2)
|
|
||||||
|
|
||||||
mv ${par_output}/kallisto_quant.log ${par_log}
|
|
||||||
mv ${par_output}/run_info.json ${par_run_info}
|
|
||||||
cp ${par_output}/abundance.tsv ${par_quant_results_file}
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
echo ">>> Generating Kallisto index"
|
|
||||||
kallisto index \
|
|
||||||
-i index \
|
|
||||||
$meta_resources_dir/transcriptome.fasta
|
|
||||||
|
|
||||||
echo ">>> Testing for paired-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--index index \
|
|
||||||
--paired true \
|
|
||||||
--strandedness reverse \
|
|
||||||
--output paired_end_test \
|
|
||||||
--input "SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz" \
|
|
||||||
--log quant_pe.log \
|
|
||||||
--run_info pe_run_info.json
|
|
||||||
|
|
||||||
echo ">>> Checking whether output exists"
|
|
||||||
[ ! -d "paired_end_test" ] && echo "Kallisto results do not exist!" && exit 1
|
|
||||||
[ ! -f "quant_pe.log" ] && echo "quant_pe.log does not exist!" && exit 1
|
|
||||||
[ ! -s "quant_pe.log" ] && echo "quant_pe.log is empty!" && exit 1
|
|
||||||
[ ! -f "pe_run_info.json" ] && echo "pe_run_info.json does not exist!" && exit 1
|
|
||||||
[ ! -s "pe_run_info.json" ] && echo "pe_run_info.json is empty!" && exit 1
|
|
||||||
[ ! -f "paired_end_test/abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1
|
|
||||||
[ ! -s "paired_end_test/abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1
|
|
||||||
[ ! -f "paired_end_test/abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1
|
|
||||||
[ ! -s "paired_end_test/abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Testing for single-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--index index \
|
|
||||||
--paired false \
|
|
||||||
--strandedness "reverse" \
|
|
||||||
--output single_end_test \
|
|
||||||
--input "SRR6357070_1.fastq.gz" \
|
|
||||||
--log quant_se.log \
|
|
||||||
--run_info se_run_info.json \
|
|
||||||
--fragment_length 101 \
|
|
||||||
--fragment_length_sd 50
|
|
||||||
|
|
||||||
echo ">>> Checking whether output exists"
|
|
||||||
[ ! -d "single_end_test" ] && echo "Kallisto results do not exist!" && exit 1
|
|
||||||
[ ! -f "quant_se.log" ] && echo "quant_se.log does not exist!" && exit 1
|
|
||||||
[ ! -s "quant_se.log" ] && echo "quant_se.log is empty!" && exit 1
|
|
||||||
[ ! -f "se_run_info.json" ] && echo "se_run_info.json does not exist!" && exit 1
|
|
||||||
[ ! -s "se_run_info.json" ] && echo "se_run_info.json is empty!" && exit 1
|
|
||||||
[ ! -f "single_end_test/abundance.tsv" ] && echo "abundance.tsv does not exist!" && exit 1
|
|
||||||
[ ! -s "single_end_test/abundance.tsv" ] && echo "abundance.tsv is empty!" && exit 1
|
|
||||||
[ ! -f "single_end_test/abundance.h5" ] && echo "abundance.h5 does not exist!" && exit 1
|
|
||||||
[ ! -s "single_end_test/abundance.h5" ] && echo "abundance.h5 is empty!" && exit 1
|
|
||||||
|
|
||||||
echo "All tests succeeded!"
|
|
||||||
exit 0
|
|
||||||
@@ -1,118 +0,0 @@
|
|||||||
name: "qualimap"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/qualimap/rnaseq/main.nf]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
RNA-seq QC analysis using the qualimap
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: path to input mapping file in BAM format.
|
|
||||||
|
|
||||||
- name: "--gtf"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: path to annotations file in Ensembl GTF format.
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output_dir"
|
|
||||||
direction: output
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
default: $id.qualimap_output
|
|
||||||
description: path to output directory for raw data and report.
|
|
||||||
|
|
||||||
- name: "--output_pdf"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.report.pdf
|
|
||||||
description: path to output file for pdf report.
|
|
||||||
|
|
||||||
- name: "--output_format"
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
default: html
|
|
||||||
description: Format of the output report (PDF or HTML, default is HTML)
|
|
||||||
|
|
||||||
- name: "Optional"
|
|
||||||
arguments:
|
|
||||||
- name: "--pr_bases"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 100
|
|
||||||
min: 1
|
|
||||||
description: Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).
|
|
||||||
|
|
||||||
- name: "--tr_bias"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 1000
|
|
||||||
min: 1
|
|
||||||
description: Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).
|
|
||||||
|
|
||||||
- name: "--algorithm"
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
default: uniquely-mapped-reads
|
|
||||||
description: Counting algorithm (uniquely-mapped-reads (default) or proportional).
|
|
||||||
|
|
||||||
- name: "--sequencing_protocol"
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
choices: ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"]
|
|
||||||
default: non-strand-specific
|
|
||||||
description: Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).
|
|
||||||
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean_true
|
|
||||||
description: Setting this flag for paired-end experiments will result in counting fragments instead of reads.
|
|
||||||
|
|
||||||
- name: "--sorted"
|
|
||||||
type: boolean_true
|
|
||||||
description: Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis.
|
|
||||||
|
|
||||||
- name: "--java_memory_size"
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
default: 4G
|
|
||||||
description: maximum Java heap memory size, default = 4G.
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam
|
|
||||||
- path: /testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai
|
|
||||||
- path: /testData/unit_test_resources/genes.gtf
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [ r-base, unzip, wget, openjdk-8-jdk, libxml2-dev, libcurl4-openssl-dev ]
|
|
||||||
- type: docker
|
|
||||||
run: |
|
|
||||||
wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \
|
|
||||||
unzip qualimap_v2.3.zip && \
|
|
||||||
cp -a qualimap_v2.3/. usr/bin && \
|
|
||||||
unset DISPLAY && \
|
|
||||||
mkdir -p tmp && \
|
|
||||||
export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp
|
|
||||||
- type: r
|
|
||||||
bioc: [ NOISeqr ]
|
|
||||||
cran: [ optparse ]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
mkdir -p $par_output_dir
|
|
||||||
|
|
||||||
qualimap rnaseq \
|
|
||||||
--java-mem-size=$par_java_memory_size \
|
|
||||||
--algorithm $par_algorithm \
|
|
||||||
--num-pr-bases $par_pr_bases \
|
|
||||||
--num-tr-bias $par_tr_bias \
|
|
||||||
--sequencing-protocol $par_sequencing_protocol \
|
|
||||||
-bam $par_input \
|
|
||||||
-gtf $par_gtf \
|
|
||||||
${par_paired:+-pe} \
|
|
||||||
${par_sorted:+-s} \
|
|
||||||
-outdir $par_output_dir \
|
|
||||||
-outformat $par_output_format
|
|
||||||
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
echo "> Running $meta_functionality_name."
|
|
||||||
|
|
||||||
# define input and output for script
|
|
||||||
input_bam="$meta_resources_dir/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
|
|
||||||
input_gtf="$meta_resources_dir/genes.gtf"
|
|
||||||
output_dir="qualimap_output"
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--input "$input_bam" \
|
|
||||||
--gtf "$input_gtf" \
|
|
||||||
--output_dir "$output_dir"
|
|
||||||
|
|
||||||
exit_code=$?
|
|
||||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
|
||||||
|
|
||||||
echo ">> Checking whether output dir and files exists"
|
|
||||||
|
|
||||||
[ ! -d "$output_dir" ] && echo "Output dir could not be found!" && exit 1
|
|
||||||
[ ! -d "$output_dir/raw_data_qualimapReport" ] && echo "Raw data folder could not be found!" && exit 1
|
|
||||||
[ -z $(ls -A "$output_dir/raw_data_qualimapReport") ] && echo "Raw data folder is missing output files" && exit 1
|
|
||||||
[ ! -f "$output_dir/qualimapReport.html" ] && echo "Qualimap report was not found" && exit 1
|
|
||||||
[ ! -s "$output_dir/qualimapReport.html" ] && echo "Qualimap report is empty" && exit 1
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
@@ -1,135 +0,0 @@
|
|||||||
name: "rsem_calculate_expression"
|
|
||||||
namespace: "rsem"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/rsem/calculateexpression/main.nf, modules/nf-core/rsem/calculateexpression/meta.yml]
|
|
||||||
last_sha: 92b2a7857de1dda9d1c19a088941fc81e2976ff7
|
|
||||||
|
|
||||||
description: |
|
|
||||||
Calculate expression with RSEM.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--id"
|
|
||||||
type: string
|
|
||||||
description: Sample ID.
|
|
||||||
- name: "--strandedness"
|
|
||||||
type: string
|
|
||||||
description: Sample strand-specificity. Must be one of unstranded, forward, reverse
|
|
||||||
choices: [forward, reverse, unstranded]
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean
|
|
||||||
description: Paired-end reads or not?
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
description: Input reads for quantification.
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ";"
|
|
||||||
- name: "--index"
|
|
||||||
type: file
|
|
||||||
description: RSEM index.
|
|
||||||
- name: "--extra_args"
|
|
||||||
type: string
|
|
||||||
description: Extra rsem-calculate-expression arguments in addition to the defaults.
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--counts_gene"
|
|
||||||
type: file
|
|
||||||
description: Expression counts on gene level
|
|
||||||
example: sample.genes.results
|
|
||||||
direction: output
|
|
||||||
- name: "--counts_transcripts"
|
|
||||||
type: file
|
|
||||||
description: Expression counts on transcript level
|
|
||||||
example: sample.isoforms.results
|
|
||||||
direction: output
|
|
||||||
- name: "--stat"
|
|
||||||
type: file
|
|
||||||
description: RSEM statistics
|
|
||||||
example: sample.stat
|
|
||||||
direction: output
|
|
||||||
- name: "--logs"
|
|
||||||
type: file
|
|
||||||
description: RSEM logs
|
|
||||||
example: sample.log
|
|
||||||
direction: output
|
|
||||||
- name: "--bam_star"
|
|
||||||
type: file
|
|
||||||
description: BAM file generated by STAR (optional)
|
|
||||||
example: sample.STAR.genome.bam
|
|
||||||
direction: output
|
|
||||||
- name: "--bam_genome"
|
|
||||||
type: file
|
|
||||||
description: Genome BAM file (optional)
|
|
||||||
example: sample.genome.bam
|
|
||||||
direction: output
|
|
||||||
- name: "--bam_transcript"
|
|
||||||
type: file
|
|
||||||
description: Transcript BAM file (optional)
|
|
||||||
example: sample.transcript.bam
|
|
||||||
direction: output
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz
|
|
||||||
- path: /testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz
|
|
||||||
- path: /testData/minimal_test/reference/rsem.tar.gz
|
|
||||||
|
|
||||||
# TODO: Install bowtie/bowtie2
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages:
|
|
||||||
- build-essential
|
|
||||||
- gcc
|
|
||||||
- g++
|
|
||||||
- make
|
|
||||||
- wget
|
|
||||||
- zlib1g-dev
|
|
||||||
- unzip
|
|
||||||
- xxd
|
|
||||||
- perl
|
|
||||||
- r-base
|
|
||||||
- bowtie2
|
|
||||||
- python3-pip
|
|
||||||
- git
|
|
||||||
- type: docker
|
|
||||||
env:
|
|
||||||
- STAR_VERSION=2.7.11b
|
|
||||||
- RSEM_VERSION=1.3.3
|
|
||||||
- TZ=Europe/Brussels
|
|
||||||
run: |
|
|
||||||
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \
|
|
||||||
cd /tmp && \
|
|
||||||
wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \
|
|
||||||
unzip ${STAR_VERSION}.zip && \
|
|
||||||
cd STAR-${STAR_VERSION}/source && \
|
|
||||||
make STARstatic CXXFLAGS_SIMD=-std=c++11 && \
|
|
||||||
cp STAR /usr/local/bin && \
|
|
||||||
cd /tmp && \
|
|
||||||
wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \
|
|
||||||
unzip v${RSEM_VERSION}.zip && \
|
|
||||||
cd RSEM-${RSEM_VERSION} && \
|
|
||||||
make && \
|
|
||||||
make install && \
|
|
||||||
rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \
|
|
||||||
rm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \
|
|
||||||
cd && \
|
|
||||||
apt-get clean && \
|
|
||||||
echo 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \
|
|
||||||
echo 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \
|
|
||||||
/bin/bash -c "source /etc/profile && source ~/.bashrc && echo $PATH && which STAR"
|
|
||||||
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
function clean_up {
|
|
||||||
rm -rf "$tmpdir"
|
|
||||||
}
|
|
||||||
trap clean_up EXIT
|
|
||||||
|
|
||||||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
|
|
||||||
|
|
||||||
[[ "$par_paired" == "false" ]] && unset par_paired
|
|
||||||
|
|
||||||
if [ $par_strandedness == 'forward' ]; then
|
|
||||||
strandedness='--strandedness forward'
|
|
||||||
elif [ $par_strandedness == 'reverse' ]; then
|
|
||||||
strandedness='--strandedness reverse'
|
|
||||||
else
|
|
||||||
strandedness=''
|
|
||||||
fi
|
|
||||||
|
|
||||||
IFS=";" read -ra input <<< $par_input
|
|
||||||
|
|
||||||
INDEX=`find -L $par_index/ -name "*.grp" | sed 's/\.grp$//'`
|
|
||||||
|
|
||||||
rsem-calculate-expression \
|
|
||||||
${meta_cpus:+--num-threads $meta_cpus} \
|
|
||||||
$strandedness \
|
|
||||||
${par_paired:+--paired-end} \
|
|
||||||
$par_extra_args \
|
|
||||||
${input[*]} \
|
|
||||||
$INDEX \
|
|
||||||
$par_id
|
|
||||||
|
|
||||||
[[ -e "${par_id}.genes.results" ]] && mv "${par_id}.genes.results" $par_counts_gene
|
|
||||||
[[ -e "${par_id}id.isoforms.results" ]] && mv "${par_id}id.isoforms.results" $par_counts_transcripts
|
|
||||||
[[ -e "${par_id}.stat" ]] && mv "${par_id}.stat" $par_stat
|
|
||||||
# [[ -e "${par_id}.log" ]] && mv "${par_id}.log" $par_logs
|
|
||||||
[[ -e "${par_id}.STAR.genome.bam" ]] && mv "${par_id}.STAR.genome.bam" $par_bam_star
|
|
||||||
[[ -e "${par_id}.genome.bam" ]] && mv "${par_id}.genome.bam" $par_bam_genome
|
|
||||||
[[ -e "${par_id}.transcript.bam" ]] && mv "${par_id}.transcript.bam" $par_bam_transcript
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
tar -xavf $meta_resources_dir/rsem.tar.gz
|
|
||||||
|
|
||||||
echo ">>> Calculating expression"
|
|
||||||
"$meta_executable" \
|
|
||||||
--id WT_REP1 \
|
|
||||||
--strandedness reverse \
|
|
||||||
--paired true \
|
|
||||||
--input "$meta_resources_dir/SRR6357070_1.fastq.gz;$meta_resources_dir/SRR6357070_2.fastq.gz" \
|
|
||||||
--index rsem \
|
|
||||||
--extra_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \
|
|
||||||
--counts_gene WT_REP1.genes.results \
|
|
||||||
--counts_transctips WT_REP1.isoforms.results \
|
|
||||||
--logs WT_REP1.log
|
|
||||||
|
|
||||||
echo ">>> Checking whether output exists"
|
|
||||||
[ ! -f "WT_REP1.genes.results" ] && echo "Gene level expression counts file does not exist!" && exit 1
|
|
||||||
[ ! -s "WT_REP1.genes.results" ] && echo "Gene level expression counts file is empty!" && exit 1
|
|
||||||
[ ! -f "WT_REP1.log" ] && echo "Log file does not exist!" && exit 1
|
|
||||||
[ ! -s "WT_REP1.log" ] && echo "Log file is empty!" && exit 1
|
|
||||||
|
|
||||||
echo "All tests succeeded!"
|
|
||||||
exit 0
|
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
name: "rsem_merge_counts"
|
name: "rsem_merge_counts"
|
||||||
namespace: "rsem"
|
|
||||||
info:
|
info:
|
||||||
migration_info:
|
migration_info:
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
git_repo: https://github.com/nf-core/rnaseq.git
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
name: "rseqc_bamstat"
|
|
||||||
namespace: "rseqc"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/rseqc/bamstat/main.nf]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
Generate statistics from a bam file.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: input alignment file in BAM or SAM format
|
|
||||||
|
|
||||||
- name: "--map_qual"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 30
|
|
||||||
description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.
|
|
||||||
min: 0
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
default: $id.mapping_quality.txt
|
|
||||||
description: output file (txt) with mapping quality statistics
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [ python3-pip ]
|
|
||||||
- type: python
|
|
||||||
packages: [ RSeQC ]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
bam_stat.py \
|
|
||||||
--input $par_input \
|
|
||||||
--mapq $par_map_qual \
|
|
||||||
> $par_output
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# define input and output for script
|
|
||||||
|
|
||||||
input_bam="test.paired_end.sorted.bam"
|
|
||||||
output_summary="mapping_quality.txt"
|
|
||||||
|
|
||||||
# run executable and tests
|
|
||||||
echo "> Running $meta_functionality_name."
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--input "$meta_resources_dir/$input_bam" \
|
|
||||||
--output "$output_summary"
|
|
||||||
|
|
||||||
exit_code=$?
|
|
||||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
|
||||||
|
|
||||||
echo ">> Checking whether output can be found and has content"
|
|
||||||
|
|
||||||
[ ! -f "$output_summary" ] && echo "$output_summary file missing" && exit 1
|
|
||||||
[ ! -s "$output_summary" ] && echo "$output_summary file is empty" && exit 1
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
name: "rseqc_inferexperiment"
|
|
||||||
namespace: "rseqc"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/rseqc/inferexperiment/main.nf]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
Infer strandedness from sequencing reads
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: input alignment file in BAM or SAM format
|
|
||||||
|
|
||||||
- name: "--refgene"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: Reference gene model in bed format
|
|
||||||
|
|
||||||
- name: "--sample_size"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 200000
|
|
||||||
min: 1
|
|
||||||
description: Numer of reads sampled from SAM/BAM file, default = 200000.
|
|
||||||
|
|
||||||
- name: "--map_qual"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 30
|
|
||||||
description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.
|
|
||||||
min: 0
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
default: $id.strandedness.txt
|
|
||||||
description: output file (txt) of strandness report
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam
|
|
||||||
- path: /testData/unit_test_resources/sarscov2/test.bed12
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [ python3-pip ]
|
|
||||||
- type: python
|
|
||||||
packages: [ RSeQC ]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
infer_experiment.py \
|
|
||||||
-i $par_input \
|
|
||||||
-r $par_refgene \
|
|
||||||
-s $par_sample_size \
|
|
||||||
-q $par_map_qual \
|
|
||||||
> $par_output
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# define input and output for script
|
|
||||||
input_bam="$meta_resources_dir/test.paired_end.sorted.bam"
|
|
||||||
input_bed="$meta_resources_dir/test.bed12"
|
|
||||||
output="strandedness.txt"
|
|
||||||
|
|
||||||
# run executable and tests
|
|
||||||
echo "> Running $meta_functionality_name."
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--input "$input_bam" \
|
|
||||||
--refgene "$input_bed" \
|
|
||||||
--output "$output"
|
|
||||||
|
|
||||||
exit_code=$?
|
|
||||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
|
||||||
|
|
||||||
echo ">> Checking whether output can be found and has content"
|
|
||||||
|
|
||||||
[ ! -f "$output" ] && echo "$output is missing" && exit 1
|
|
||||||
[ ! -s "$output" ] && echo "$output is empty" && exit 1
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
@@ -1,117 +0,0 @@
|
|||||||
name: "rseqc_innerdistance"
|
|
||||||
namespace: "rseqc"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/rseqc/innerdistance/main.nf]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
Calculate inner distance between read pairs.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: input alignment file in BAM or SAM format
|
|
||||||
|
|
||||||
- name: "--refgene"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: Reference gene model in bed format
|
|
||||||
|
|
||||||
- name: "--sample_size"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 200000
|
|
||||||
min: 1
|
|
||||||
description: Numer of reads sampled from SAM/BAM file, default = 200000.
|
|
||||||
|
|
||||||
- name: "--map_qual"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 30
|
|
||||||
description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30.
|
|
||||||
min: 0
|
|
||||||
|
|
||||||
- name: "--lower_bound_size"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: -250
|
|
||||||
description: Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250.
|
|
||||||
|
|
||||||
- name: "--upper_bound_size"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 250
|
|
||||||
description: Upper bound of inner distance (bp). This option is used for ploting histograme, default=250.
|
|
||||||
|
|
||||||
- name: "--step_size"
|
|
||||||
type: integer
|
|
||||||
required: false
|
|
||||||
default: 5
|
|
||||||
description: Step size (bp) of histograme. This option is used for plotting histogram, default=5.
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output_stats"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.inner_distance.stats
|
|
||||||
description: output file (txt) with summary statistics of inner distances of paired reads
|
|
||||||
|
|
||||||
- name: "--output_dist"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.inner_distance.txt
|
|
||||||
description: output file (txt) with inner distances of all paired reads
|
|
||||||
|
|
||||||
- name: "--output_freq"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.inner_distance_freq.txt
|
|
||||||
description: output file (txt) with frequencies of inner distances of all paired reads
|
|
||||||
|
|
||||||
- name: "--output_plot"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.inner_distance_plot.pdf
|
|
||||||
description: output file (pdf) with histogram plot of of inner distances of all paired reads
|
|
||||||
|
|
||||||
- name: "--output_plot_r"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
default: $id.inner_distance_plot.r
|
|
||||||
description: output file (R) with script of histogram plot of of inner distances of all paired reads
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/unit_test_resources/sarscov2/test.paired_end.sorted.bam
|
|
||||||
- path: /testData/unit_test_resources/sarscov2/test.bed12
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [python3-pip, r-base]
|
|
||||||
- type: python
|
|
||||||
packages: [ RSeQC ]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,23 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -exo pipefail
|
|
||||||
|
|
||||||
prefix=$(openssl rand -hex 8)
|
|
||||||
|
|
||||||
inner_distance.py \
|
|
||||||
-i $par_input \
|
|
||||||
-r $par_refgene \
|
|
||||||
-o $prefix \
|
|
||||||
-k $par_sample_size \
|
|
||||||
-l $par_lower_bound_size \
|
|
||||||
-u $par_upper_bound_size \
|
|
||||||
-s $par_step_size \
|
|
||||||
-q $par_map_qual \
|
|
||||||
> stdout.txt
|
|
||||||
|
|
||||||
head -n 2 stdout.txt > $par_output_stats
|
|
||||||
|
|
||||||
[[ -f "$prefix.inner_distance.txt" ]] && mv $prefix.inner_distance.txt $par_output_dist
|
|
||||||
[[ -f "$prefix.inner_distance_plot.pdf" ]] && mv $prefix.inner_distance_plot.pdf $par_output_plot
|
|
||||||
[[ -f "$prefix.inner_distance_plot.r" ]] && mv $prefix.inner_distance_plot.r $par_output_plot_r
|
|
||||||
[[ -f "$prefix.inner_distance_freq.txt" ]] && mv $prefix.inner_distance_freq.txt $par_output_freq
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
gunzip "$meta_resources_dir/hg19_RefSeq.bed.gz"
|
|
||||||
|
|
||||||
# define input and output for script
|
|
||||||
input_bam="$meta_resources_dir/test.paired_end.sorted.bam"
|
|
||||||
input_bed="$meta_resources_dir/test.bed12"
|
|
||||||
|
|
||||||
output_stats="inner_distance_stats.txt"
|
|
||||||
output_dist="inner_distance.txt"
|
|
||||||
output_plot="inner_distance_plot.pdf"
|
|
||||||
output_plot_r="inner_distance_plot.r"
|
|
||||||
output_freq="inner_distance_freq.txt"
|
|
||||||
|
|
||||||
# Run executable
|
|
||||||
echo "> Running $meta_functionality_name"
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--input $input_bam \
|
|
||||||
--refgene $input_bed \
|
|
||||||
--output_stats $output_stats \
|
|
||||||
--output_dist $output_dist \
|
|
||||||
--output_plot $output_plot \
|
|
||||||
--output_plot_r $output_plot_r \
|
|
||||||
--output_freq $output_freq
|
|
||||||
|
|
||||||
exit_code=$?
|
|
||||||
[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1
|
|
||||||
|
|
||||||
echo ">> asserting output has been created for paired read input"
|
|
||||||
|
|
||||||
[ ! -f "$output_stats" ] && echo "$output_stats was not created" && exit 1
|
|
||||||
[ ! -s "$output_stats" ] && echo "$output_stats is empty" && exit 1
|
|
||||||
[ ! -f "$output_dist" ] && echo "$output_dist was not created" && exit 1
|
|
||||||
[ ! -s "$output_dist" ] && echo "$output_dist is empty" && exit 1
|
|
||||||
[ ! -f "$output_plot" ] && echo "$output_plot was not created" && exit 1
|
|
||||||
[ ! -s "$output_plot" ] && echo "$output_plot is empty" && exit 1
|
|
||||||
[ ! -f "$output_plot_r" ] && echo "$output_plot_r was not created" && exit 1
|
|
||||||
[ ! -s "$output_plot_r" ] && echo "$output_plot_r is empty" && exit 1
|
|
||||||
[ ! -f "$output_freq" ] && echo "$output_freq was not created" && exit 1
|
|
||||||
[ ! -s "$output_freq" ] && echo "$output_freq is empty" && exit 1
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
@@ -62,4 +62,4 @@ engines:
|
|||||||
image: quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0
|
image: quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0
|
||||||
runners:
|
runners:
|
||||||
- type: executable
|
- type: executable
|
||||||
- type: nextflow
|
- type: nextflow
|
||||||
|
|||||||
@@ -39,3 +39,4 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
mv rRNA_reads.log $par_sortmerna_log
|
mv rRNA_reads.log $par_sortmerna_log
|
||||||
|
|
||||||
|
|||||||
@@ -38,3 +38,4 @@ echo ">> Checking if the correct files are present"
|
|||||||
|
|
||||||
echo ">>> Test finished successfully"
|
echo ">>> Test finished successfully"
|
||||||
exit 0
|
exit 0
|
||||||
|
|
||||||
|
|||||||
@@ -1,309 +0,0 @@
|
|||||||
name: trimgalore
|
|
||||||
description: |
|
|
||||||
A wrapper tool around Cutadapt and FastQC to consistently apply quality and adapter trimming to FastQ files.
|
|
||||||
keywords: ["trimming", "adapters"]
|
|
||||||
links:
|
|
||||||
homepage: https://github.com/FelixKrueger/TrimGalore
|
|
||||||
documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
|
|
||||||
repository: https://github.com/FelixKrueger/TrimGalore
|
|
||||||
license: GPL-3.0
|
|
||||||
requirements:
|
|
||||||
commands: [trim_galore]
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: Input
|
|
||||||
arguments:
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
description: Input files. Note that paired-end files need to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz
|
|
||||||
required: true
|
|
||||||
multiple: true
|
|
||||||
example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq
|
|
||||||
- name: Trimming options
|
|
||||||
arguments:
|
|
||||||
- name: --quality
|
|
||||||
alternatives: -q
|
|
||||||
type: integer
|
|
||||||
description: Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal. For RRBS samples, quality trimming will be performed first, and adapter trimming is carried in a second round. Other files are quality and adapter trimmed in a single pass. The algorithm is the same as the one used by BWA (Subtract INT from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal).
|
|
||||||
example: 20
|
|
||||||
required: false
|
|
||||||
- name: --phred33
|
|
||||||
type: boolean
|
|
||||||
description: Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming.
|
|
||||||
required: false
|
|
||||||
- name: --phred64
|
|
||||||
type: boolean
|
|
||||||
description: Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming.
|
|
||||||
required: false
|
|
||||||
- name: --fastqc
|
|
||||||
type: boolean
|
|
||||||
description: Run FastQC in the default mode on the FastQ file once trimming is complete.
|
|
||||||
required: false
|
|
||||||
- name: --fastqc_args
|
|
||||||
type: string
|
|
||||||
description: Passes extra arguments to FastQC. If more than one argument is to be passed to FastQC they must be in the form "arg1 arg2 ...". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately.
|
|
||||||
required: false
|
|
||||||
example: "--nogroup --outdir /home/"
|
|
||||||
- name: --adapter
|
|
||||||
alternatives: -a
|
|
||||||
type: string
|
|
||||||
description: |
|
|
||||||
Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA.
|
|
||||||
At a special request, multiple adapters can also be specified like so:
|
|
||||||
-a " AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT" -a2 " AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT",
|
|
||||||
or so:
|
|
||||||
-a "file:../multiple_adapters.fa" -a2 "file:../different_adapters.fa"
|
|
||||||
Potentially in conjucntion with the parameter "-n 3" to trim all adapters.
|
|
||||||
example: 20
|
|
||||||
required: false
|
|
||||||
example: AGCTCCCG
|
|
||||||
- name: --adapter2
|
|
||||||
alternatives: -a2
|
|
||||||
type: string
|
|
||||||
description: Optional adapter sequence to be trimmed off read 2 of paired-end files. This option requires '--paired' to be specified as well. If the libraries to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5' adapter automatically (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA.
|
|
||||||
required: false
|
|
||||||
example: AGCTCCCG
|
|
||||||
- name: --illumina
|
|
||||||
type: boolean
|
|
||||||
description: Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence.
|
|
||||||
required: false
|
|
||||||
- name: --stranded_illumina
|
|
||||||
type: boolean
|
|
||||||
description: Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence.
|
|
||||||
required: false
|
|
||||||
- name: --nextera
|
|
||||||
type: boolean
|
|
||||||
description: Adapter sequence to be trimmed is the first 12bp of the Nextera adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence.
|
|
||||||
required: false
|
|
||||||
- name: --small_rna
|
|
||||||
type: boolean
|
|
||||||
description: Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly.
|
|
||||||
- name: --consider_already_trimmed
|
|
||||||
type: integer
|
|
||||||
description: During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed. If no adapter sequence exceeds this threshold, no additional adapter trimming will be performed (technically, the adapter is set to '-a X'). Quality trimming is still performed as usual.
|
|
||||||
required: false
|
|
||||||
- name: --max_length
|
|
||||||
type: integer
|
|
||||||
description: Discard reads that are longer than the specified value after trimming. This is only advised for smallRNA sequencing to remove non-small RNA sequences.
|
|
||||||
required: false
|
|
||||||
- name: --stringency
|
|
||||||
type: integer
|
|
||||||
description: Overlap with adapter sequence required to trim a sequence. Defaults to a very stringent setting of 1, i.e. even a single bp of overlapping sequence will be trimmed off from the 3' end of any read.
|
|
||||||
required: false
|
|
||||||
example: 1
|
|
||||||
- name: --error_rate
|
|
||||||
alternatives: -e
|
|
||||||
type: double
|
|
||||||
description: Maximum allowed error rate (no. of errors divided by the length of the matching region)
|
|
||||||
required: false
|
|
||||||
example: 0.1
|
|
||||||
- name: --gzip
|
|
||||||
type: boolean
|
|
||||||
description: Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly.
|
|
||||||
required: false
|
|
||||||
- name: --dont_gzip
|
|
||||||
type: boolean
|
|
||||||
description: Output files won't be compressed with GZIP. This option overrides --gzip.
|
|
||||||
required: false
|
|
||||||
- name: --length
|
|
||||||
type: integer
|
|
||||||
description: Discard reads that became shorter than the specified length because of either quality or adapter trimming. A value of '0' effectively disables this behaviour. For paired-end files, both reads of a read-pair need to be longer than the specified length to be printed out to validated paired-end files. If only one read became too short there is the possibility of keeping such unpaired single-end reads using the --retain_unpaired option.
|
|
||||||
required: false
|
|
||||||
example: 20
|
|
||||||
- name: --max_n
|
|
||||||
type: integer
|
|
||||||
description: The total number of Ns a read may contain before it will be removed altogether.In a paired-end setting, either read exceeding this limit will result in the entire pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, it is interpreted as a fraction of the read length.
|
|
||||||
required: false
|
|
||||||
- name: --trim_n
|
|
||||||
type: boolean
|
|
||||||
description: Removes Ns from either side of the read. This option does currently not work in RRBS mode.
|
|
||||||
required: false
|
|
||||||
- name: --no_report_file
|
|
||||||
type: boolean
|
|
||||||
description: If specified no report file will be generated.
|
|
||||||
required: false
|
|
||||||
- name: --suppress_warn
|
|
||||||
type: boolean
|
|
||||||
description: If specified any output to STDOUT or STDERR will be suppressed.
|
|
||||||
required: false
|
|
||||||
- name: --clip_R1
|
|
||||||
type: integer
|
|
||||||
description: Instructs TrimGalore to remove given number of bp from the 5' end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end.
|
|
||||||
required: false
|
|
||||||
- name: --clip_R2
|
|
||||||
type: integer
|
|
||||||
description: Instructs TrimGalore to remove given number bp from the 5' end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove the first few bp because the end-repair reaction may introduce a bias towards low methylation.
|
|
||||||
required: false
|
|
||||||
- name: --three_prime_clip_R1
|
|
||||||
type: integer
|
|
||||||
description: Instructs Trim Galore to remove spacified number of bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some bias from the 3' end that is not directly related to adapter sequence or basecall quality.
|
|
||||||
required: false
|
|
||||||
- name: --three_prime_clip_R2
|
|
||||||
type: integer
|
|
||||||
description: Instructs Trim Galore to remove <int> bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality.
|
|
||||||
required: false
|
|
||||||
- name: --nextseq
|
|
||||||
type: integer
|
|
||||||
description: This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. This is mutually exlusive with '-q INT'.
|
|
||||||
required: false
|
|
||||||
- name: --basename
|
|
||||||
type: string
|
|
||||||
description: Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
required: false
|
|
||||||
- name: --cores
|
|
||||||
alternatives: -j
|
|
||||||
type: integer
|
|
||||||
description: Number of cores to be used for trimming
|
|
||||||
required: false
|
|
||||||
example: 1
|
|
||||||
- name: Specific trimming options without adapter/quality trimming
|
|
||||||
arguments:
|
|
||||||
- name: --hardtrim5
|
|
||||||
type: integer
|
|
||||||
description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to <int> bp at the 5'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in .<int>_5prime.fq(.gz).
|
|
||||||
required: false
|
|
||||||
- name: --hardtrim3
|
|
||||||
type: integer
|
|
||||||
description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to <int> bp at the 3'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in .<int>_3prime.fq(.gz).
|
|
||||||
required: false
|
|
||||||
- name: --clock
|
|
||||||
type: boolean
|
|
||||||
description: In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock.
|
|
||||||
required: false
|
|
||||||
- name: --polyA
|
|
||||||
type: boolean
|
|
||||||
description: This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start ("32:A:") and end ("_PolyA:32") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of trimming.
|
|
||||||
required: false
|
|
||||||
- name: --implicon
|
|
||||||
type: boolean
|
|
||||||
description: |
|
|
||||||
This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it's current implementation, the UMI carrying reads come in the following format
|
|
||||||
Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3'
|
|
||||||
Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5'
|
|
||||||
Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence.
|
|
||||||
required: false
|
|
||||||
- name: RRBS-specific options
|
|
||||||
arguments:
|
|
||||||
- name: --rrbs
|
|
||||||
type: boolean
|
|
||||||
description: Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3' end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5' end (by setting '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3' MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below).
|
|
||||||
required: false
|
|
||||||
- name: --non_directional
|
|
||||||
type: boolean
|
|
||||||
description: Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well. Note that this option does not set '--clip_r2 2' in paired-end mode.
|
|
||||||
required: false
|
|
||||||
- name: --keep
|
|
||||||
type: boolean
|
|
||||||
description: Keep the quality trimmed intermediate file.
|
|
||||||
required: false
|
|
||||||
- name: Paired-end specific options
|
|
||||||
arguments:
|
|
||||||
- name: --paired
|
|
||||||
type: boolean
|
|
||||||
description: This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... .
|
|
||||||
required: false
|
|
||||||
- name: --retain_unpaired
|
|
||||||
type: boolean
|
|
||||||
description: If only one of the two paired-end reads became too short, the longer read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2.
|
|
||||||
required: false
|
|
||||||
- name: --length_1
|
|
||||||
alternatives: -r1
|
|
||||||
type: integer
|
|
||||||
description: Unpaired single-end read length cutoff needed for read 1 to be written to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode.
|
|
||||||
example: 35
|
|
||||||
required: false
|
|
||||||
- name: --length_2
|
|
||||||
alternatives: -r2
|
|
||||||
type: integer
|
|
||||||
description: Unpaired single-end read length cutoff needed for read 2 to be written to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode.
|
|
||||||
required: false
|
|
||||||
example: 35
|
|
||||||
- name: Output
|
|
||||||
arguments:
|
|
||||||
- name: --output_dir
|
|
||||||
alternatives: -o
|
|
||||||
type: file
|
|
||||||
description: If specified all output will be written to this directory instead of the current directory.
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
default: trimmed_output
|
|
||||||
- name: --trimmed_r1
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: read_1.fastq.gz
|
|
||||||
- name: --trimmed_r2
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: read_2.fastq.gz
|
|
||||||
- name: --trimming_report_r1
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: read_1.trimming_report.txt
|
|
||||||
- name: --trimming_report_r2
|
|
||||||
type: file
|
|
||||||
description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
example: read_2.trimming_report.txt
|
|
||||||
- name: --trimmed_fastqc_html_1
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: read_1.fastqc.html
|
|
||||||
- name: --trimmed_fastqc_html_2
|
|
||||||
type: file
|
|
||||||
description: FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
example: read_2.fastqc.html
|
|
||||||
- name: --trimmed_fastqc_zip_1
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: read_1.fastqc.zip
|
|
||||||
- name: --trimmed_fastqc_zip_2
|
|
||||||
type: file
|
|
||||||
description: FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
required: false
|
|
||||||
example: read_2.fastqc.zip
|
|
||||||
- name: --unpaired_r1
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: unpaired_read_1.fastq
|
|
||||||
- name: --unpaired_r2
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
description: Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists.
|
|
||||||
direction: output
|
|
||||||
example: unpaired_read_2.fastq
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0
|
|
||||||
setup:
|
|
||||||
- type: docker
|
|
||||||
run: |
|
|
||||||
echo "TrimGalore: `trim_galore --version | sed -n 's/.*version\s\+\([0-9]\+\.[0-9]\+\.[0-9]\+\).*/\1/p'`" > /var/software_versions.txt
|
|
||||||
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,111 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
[[ ! -d $output_dir ]] && mkdir -p $par_output_dir
|
|
||||||
|
|
||||||
IFS=";" read -ra input <<< $par_input
|
|
||||||
|
|
||||||
unset_if_false=(
|
|
||||||
par_phred33
|
|
||||||
par_phred64
|
|
||||||
par_fastqc
|
|
||||||
par_illumina
|
|
||||||
par_stranded_illumina
|
|
||||||
par_nextera
|
|
||||||
par_small_rna
|
|
||||||
par_gzip
|
|
||||||
par_dont_gzip
|
|
||||||
par_no_report_file
|
|
||||||
par_suppress_warn
|
|
||||||
par_clock
|
|
||||||
par_polyA
|
|
||||||
par_rrbs
|
|
||||||
par_non_directional
|
|
||||||
par_keep par_paired
|
|
||||||
par_retain_unpaired
|
|
||||||
)
|
|
||||||
|
|
||||||
for par in ${unset_if_false[@]}; do
|
|
||||||
test_val="${!par}"
|
|
||||||
[[ "$test_val" == "false" ]] && unset $par
|
|
||||||
done
|
|
||||||
|
|
||||||
trim_galore \
|
|
||||||
${par_quality:+-q "${par_quality}"} \
|
|
||||||
${par_phred33:+--phred33} \
|
|
||||||
${par_phred64:+--phred64 } \
|
|
||||||
${par_fastqc:+--fastqc } \
|
|
||||||
${par_fastqc_args:+--fastqc_args "${par_fastqc_args}"} \
|
|
||||||
${par_adapter:+-a "${par_adapter}"} \
|
|
||||||
${par_adapter2:+-a2 "${par_adapter2}"} \
|
|
||||||
${par_illumina:+--illumina} \
|
|
||||||
${par_stranded_illumina:+--stranded_illumina} \
|
|
||||||
${par_nextera:+--nextera} \
|
|
||||||
${par_small_rna:+--small_rna} \
|
|
||||||
${par_consider_already_trimmed:+--consider_already_trimmed "${par_consider_already_trimmed}"} \
|
|
||||||
${par_max_length:+--max_length "${par_max_length}"} \
|
|
||||||
${par_stringency:+--stringency "${par_stringency}"} \
|
|
||||||
${par_error_rate:+-e "${par_error_rate}"} \
|
|
||||||
${par_gzip:+--gzip} \
|
|
||||||
${par_dont_gzip:+--dont_gzip} \
|
|
||||||
${par_length:+--length "${par_length}"} \
|
|
||||||
${par_max_n:+--max_n "${par_max_n}"} \
|
|
||||||
${par_trim_n:+--trim-n "${par_trim_n}"} \
|
|
||||||
${par_no_report_file:+--no_report_file} \
|
|
||||||
${par_suppress_warn:+--suppress_warn} \
|
|
||||||
${par_clip_R1:+--clip_R1 "${par_clip_R1}"} \
|
|
||||||
${par_clip_R2:+--clip_R2 "${par_clip_R2}"} \
|
|
||||||
${par_three_prime_clip_R1:+--three_prime_clip_R1 "${par_three_prime_clip_R1}"} \
|
|
||||||
${par_three_prime_clip_R2:+--three_prime_clip_R2 "${par_three_prime_clip_R2}"} \
|
|
||||||
${par_nextseq:+--nextseq "${par_nextseq}"} \
|
|
||||||
${par_basename:+-basename "${par_basename}"} \
|
|
||||||
${par_hardtrim5:+--hardtrim5 "${par_hardtrim5}"} \
|
|
||||||
${par_hardtrim3:+--hardtrim3 "${par_hardtrim3}"} \
|
|
||||||
${par_clock:+--clock} \
|
|
||||||
${par_polyA:+--polyA} \
|
|
||||||
${par_implicon:+--implicon "${par_implicon}"} \
|
|
||||||
${par_rrbs:+--rrbs} \
|
|
||||||
${par_non_directional:+--non_directional} \
|
|
||||||
${par_keep:+--keep} \
|
|
||||||
${par_paired:+--paired} \
|
|
||||||
${par_retain_unpaired:+--retain_unpaired} \
|
|
||||||
${par_length_1:+-r1 "${par_length_1}"} \
|
|
||||||
${par_length_2:+-r2 "${par_length_2}"} \
|
|
||||||
${par_cores:+-j "${par_cores}"} \
|
|
||||||
-o $par_output_dir \
|
|
||||||
${input[*]}
|
|
||||||
|
|
||||||
if [ $par_paired == "true" ]; then
|
|
||||||
|
|
||||||
input_r1=$(basename -- "${input[0]}")
|
|
||||||
input_r2=$(basename -- "${input[1]}")
|
|
||||||
[[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*val_1.f*q* $par_trimmed_r1
|
|
||||||
[[ ! -z "$par_trimmed_r2" ]] && mv $par_output_dir/*val_2.f*q* $par_trimmed_r2
|
|
||||||
[[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1
|
|
||||||
[[ ! -z "$par_trimming_report_r2" ]] && mv $par_output_dir/${input_r2}_trimming_report.txt $par_trimming_report_r2
|
|
||||||
|
|
||||||
if [ "$par_fastqc" == "true" ]; then
|
|
||||||
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*val_1_fastqc.html $par_trimmed_fastqc_html_1
|
|
||||||
[[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv $par_output_dir/*val_2_fastqc.html $par_trimmed_fastqc_html_2
|
|
||||||
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*val_1_fastqc.zip $par_trimmed_fastqc_zip_1
|
|
||||||
[[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv $par_output_dir/*val_2_fastqc.zip $par_trimmed_fastqc_zip_2
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$par_retain_unpaired" == "true" ]; then
|
|
||||||
[[ ! -z "$par_unpaired_r1" ]] && mv $par_output_dir/*.unpaired_1.f*q* $par_unpaired_r1
|
|
||||||
[[ ! -z "$par_unpaired_r2" ]] && mv $par_output_dir/*.unpaired_2.f*q* $par_unpaired_r2
|
|
||||||
fi
|
|
||||||
|
|
||||||
else
|
|
||||||
|
|
||||||
input_r1=$(basename -- "${input[0]}")
|
|
||||||
[[ ! -z "$par_trimmed_r1" ]] && mv $par_output_dir/*_trimmed.fq* $par_trimmed_r1
|
|
||||||
[[ ! -z "$par_trimming_report_r1" ]] && mv $par_output_dir/${input_r1}_trimming_report.txt $par_trimming_report_r1
|
|
||||||
|
|
||||||
if [ "$par_fastqc" == "true" ]; then
|
|
||||||
[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv $par_output_dir/*_trimmed_fastqc.html $par_trimmed_fastqc_html_1
|
|
||||||
[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv $par_output_dir/*_trimmed_fastqc.zip $par_trimmed_fastqc_zip_1
|
|
||||||
fi
|
|
||||||
|
|
||||||
fi
|
|
||||||
@@ -1,127 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -e
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
# helper functions
|
|
||||||
assert_file_exists() {
|
|
||||||
[ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; }
|
|
||||||
}
|
|
||||||
assert_file_doesnt_exist() {
|
|
||||||
[ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; }
|
|
||||||
}
|
|
||||||
assert_file_empty() {
|
|
||||||
[ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; }
|
|
||||||
}
|
|
||||||
assert_file_not_empty() {
|
|
||||||
[ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; }
|
|
||||||
}
|
|
||||||
assert_file_contains() {
|
|
||||||
grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; }
|
|
||||||
}
|
|
||||||
assert_file_not_contains() {
|
|
||||||
grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; }
|
|
||||||
}
|
|
||||||
|
|
||||||
#################################################################
|
|
||||||
|
|
||||||
echo ">>> Prepare test data"
|
|
||||||
|
|
||||||
cat > example_R1.fastq <<'EOF'
|
|
||||||
@SRR6357071.22842410 22842410/1 kraken:taxid|4932
|
|
||||||
CAAGTTTTCATCTTCAACAGCTGATTGACTTCTTTGTGGTATGCCTCGATATATTTTTCTTTTTCTTTAATATCTTTATTATAGGTGATTGCCTCATCGTA
|
|
||||||
+
|
|
||||||
BBBBBFFFFFFFFFFFFFFF/BFFFFFFFFFFFFFFFFBFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFBF<
|
|
||||||
@SRR6357071.52260105 52260105/1 kraken:taxid|4932
|
|
||||||
TAGACTTACCAGTACCCTTTTCGACGGCGGAAACATTCAAAATACCGTTAGAGTCGACATCGAAAGTGACTTCAATTTGTGGGACACCTCTTGGAGCTGGT
|
|
||||||
+
|
|
||||||
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/FFFFFFFFFFFFFFFF
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat > example_R2.fastq <<'EOF'
|
|
||||||
@SRR6357071.22842410 22842410/2 kraken:taxid|4932
|
|
||||||
CCGAGATCGAAGAAACGAATTCACCTGATTGCAGCTGTAAAAGCAGTAAAATCAATCAAACCAATACGGACAACCTTACGATACGATGAGGCAATCACCTA
|
|
||||||
+
|
|
||||||
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
|
|
||||||
@SRR6357071.52260105 52260105/2 kraken:taxid|4932
|
|
||||||
GTTGATTCCAAGAAACTCTACCATTCCAACTAAGAAATCCGAAGTTTTCTCTACTTATGCTGACAACCAACCAGGTGTCTTGATTCAAGTCTTTGAAGGTG
|
|
||||||
+
|
|
||||||
BBBBBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
|
|
||||||
EOF
|
|
||||||
|
|
||||||
#################################################################
|
|
||||||
|
|
||||||
echo ">>> Testing for single-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired false \
|
|
||||||
--input "example_R1.fastq" \
|
|
||||||
--trimmed_fastqc_html_1 output_se_test/example.trimmed.html \
|
|
||||||
--trimmed_fastqc_zip_1 output_se_test/example.trimmed.zip \
|
|
||||||
--trimmed_r1 output_se_test/example.trimmed.fastq \
|
|
||||||
--trimming_report_r1 output_se_test/example.trimming_report.txt \
|
|
||||||
--fastqc true \
|
|
||||||
--output_dir output_se_test
|
|
||||||
|
|
||||||
echo ">> Checking output"
|
|
||||||
assert_file_exists "output_se_test/example.trimmed.html"
|
|
||||||
assert_file_exists "output_se_test/example.trimmed.zip"
|
|
||||||
assert_file_exists "output_se_test/example.trimmed.fastq"
|
|
||||||
assert_file_exists "output_se_test/example.trimming_report.txt"
|
|
||||||
|
|
||||||
echo ">> Check if output is empty"
|
|
||||||
assert_file_not_empty "output_se_test/example.trimmed.html"
|
|
||||||
assert_file_not_empty "output_se_test/example.trimmed.zip"
|
|
||||||
assert_file_not_empty "output_se_test/example.trimmed.fastq"
|
|
||||||
assert_file_not_empty "output_se_test/example.trimming_report.txt"
|
|
||||||
|
|
||||||
echo ">> Check contents"
|
|
||||||
assert_file_contains "output_se_test/example.trimmed.fastq" "@SRR6357071.22842410 22842410/1"
|
|
||||||
assert_file_contains "output_se_test/example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff"
|
|
||||||
|
|
||||||
#################################################################
|
|
||||||
|
|
||||||
echo ">>> Testing for paired-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired true \
|
|
||||||
--input "example_R1.fastq;example_R2.fastq" \
|
|
||||||
--trimmed_fastqc_html_1 output_pe_test/example_R1.trimmed.html \
|
|
||||||
--trimmed_fastqc_html_2 output_pe_test/example_R2.trimmed.html \
|
|
||||||
--trimmed_fastqc_zip_1 output_pe_test/example_R1.trimmed.zip \
|
|
||||||
--trimmed_fastqc_zip_2 output_pe_test/example_R2.trimmed.zip \
|
|
||||||
--trimmed_r1 output_pe_test/example_R1.trimmed.fastq \
|
|
||||||
--trimmed_r2 output_pe_test/example_R2.trimmed.fastq \
|
|
||||||
--trimming_report_r1 output_pe_test/example_R1.trimming_report.txt \
|
|
||||||
--trimming_report_r2 output_pe_test/example_R2.trimming_report.txt \
|
|
||||||
--fastqc true \
|
|
||||||
--output_dir output_pe_test
|
|
||||||
|
|
||||||
echo ">> Checking output"
|
|
||||||
assert_file_exists "output_pe_test/example_R1.trimmed.html"
|
|
||||||
assert_file_exists "output_pe_test/example_R2.trimmed.html"
|
|
||||||
assert_file_exists "output_pe_test/example_R1.trimmed.zip"
|
|
||||||
assert_file_exists "output_pe_test/example_R2.trimmed.zip"
|
|
||||||
assert_file_exists "output_pe_test/example_R1.trimmed.fastq"
|
|
||||||
assert_file_exists "output_pe_test/example_R2.trimmed.fastq"
|
|
||||||
assert_file_exists "output_pe_test/example_R1.trimming_report.txt"
|
|
||||||
assert_file_exists "output_pe_test/example_R2.trimming_report.txt"
|
|
||||||
|
|
||||||
echo ">> Check if output is empty"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R1.trimmed.html"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R2.trimmed.html"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R1.trimmed.zip"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R2.trimmed.zip"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R1.trimmed.fastq"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R2.trimmed.fastq"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R1.trimming_report.txt"
|
|
||||||
assert_file_not_empty "output_pe_test/example_R2.trimming_report.txt"
|
|
||||||
|
|
||||||
echo ">> Check contents"
|
|
||||||
assert_file_contains "output_pe_test/example_R1.trimmed.fastq" "@SRR6357071.22842410 22842410/1"
|
|
||||||
assert_file_contains "output_pe_test/example_R2.trimmed.fastq" "@SRR6357071.22842410 22842410/2"
|
|
||||||
assert_file_contains "output_pe_test/example_R1.trimming_report.txt" "sequences processed in total"
|
|
||||||
assert_file_contains "output_pe_test/example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff"
|
|
||||||
|
|
||||||
#################################################################
|
|
||||||
|
|
||||||
echo ">>> Test finished successfully"
|
|
||||||
exit 0
|
|
||||||
@@ -137,5 +137,6 @@ if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene))
|
|||||||
done <- lapply(params, write_se_table)
|
done <- lapply(params, write_se_table)
|
||||||
|
|
||||||
# Output session information and citations
|
# Output session information and citations
|
||||||
citation("tximeta")
|
# Removed for now because the 'tximeta' package is not found sometimes
|
||||||
|
# citation("tximeta")
|
||||||
sessionInfo()
|
sessionInfo()
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
name: "umitools_dedup"
|
|
||||||
namespace: "umitools"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/umitools/dedup/main.nf, modules/nf-core/umitools/dedup/meta.yml]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean
|
|
||||||
default: false
|
|
||||||
description: Paired fastq files or not?
|
|
||||||
- name: "--bam"
|
|
||||||
type: file
|
|
||||||
description: Input BAM file
|
|
||||||
- name: "--bai"
|
|
||||||
type: file
|
|
||||||
description: BAM index
|
|
||||||
- name: "--get_output_stats"
|
|
||||||
type: boolean
|
|
||||||
description: Whether or not to generate output stats.
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output_bam"
|
|
||||||
type: file
|
|
||||||
description: Deduplicated BAM file
|
|
||||||
direction: output
|
|
||||||
default: $id.$key.bam
|
|
||||||
- name: "--output_stats"
|
|
||||||
type: file
|
|
||||||
description: Directory containing UMI based dedupllication statistics files
|
|
||||||
direction: output
|
|
||||||
default: $id.umi_dedup.stats
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/unit_test_resources/chr19.bam
|
|
||||||
- path: /testData/unit_test_resources/chr19.bam.bai
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [pip]
|
|
||||||
- type: python
|
|
||||||
packages: [umi_tools]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
args="--random-seed=100"
|
|
||||||
|
|
||||||
if $par_paired; then
|
|
||||||
paired="--paired"
|
|
||||||
args+=" --unpaired-reads=discard --chimeric-pairs=discard"
|
|
||||||
else
|
|
||||||
paired=""
|
|
||||||
fi
|
|
||||||
|
|
||||||
if $par_get_output_stats; then
|
|
||||||
mkdir -p $par_output_stats
|
|
||||||
stats="--output-stats $par_output_stats/"
|
|
||||||
else
|
|
||||||
stats=""
|
|
||||||
fi
|
|
||||||
|
|
||||||
PYTHONHASHSEED=0 umi_tools dedup -I $par_bam -S $par_output_bam $stats $paired $args
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired false \
|
|
||||||
--bam $meta_resources_dir/chr19.bam \
|
|
||||||
--bai $meta_resources_dir/chr19.bam.bai \
|
|
||||||
--get_output_stats true \
|
|
||||||
--output_bam chr19.deduped.bam \
|
|
||||||
--output_stats chr19.umi_dedup.stats
|
|
||||||
|
|
||||||
echo ">>> Checking whether output exists"
|
|
||||||
[ ! -f "chr19.deduped.bam" ] && echo "File 'chr19.deduped.bam' does not exist!" && exit 1
|
|
||||||
[ ! -s "chr19.deduped.bam" ] && echo "File 'chr19.deduped.bam' is empty!" && exit 1
|
|
||||||
[ ! -d "chr19.umi_dedup.stats" ] && echo "Directory 'chr19.umi_dedup.stats' does not exist!" && exit 1
|
|
||||||
[ -z "$(ls -A 'chr19.umi_dedup.stats')" ] && echo "Directory 'chr19.umi_dedup.stats' is empty!" && exit 1
|
|
||||||
|
|
||||||
echo "All tests succeeded!"
|
|
||||||
exit 0
|
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
name: "umitools_extract"
|
|
||||||
namespace: "umitools"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/nf-core/umitools/extract/main.nf, modules/nf-core/umitools/extract/meta.yml]
|
|
||||||
last_sha: 54721c6946daf6d602d7069dc127deef9cbe6b33
|
|
||||||
description: |
|
|
||||||
UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.
|
|
||||||
This component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.
|
|
||||||
This component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--paired"
|
|
||||||
type: boolean
|
|
||||||
required: false
|
|
||||||
default: false
|
|
||||||
description: Paired fastq files or not?
|
|
||||||
- name: "--input"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ","
|
|
||||||
description: Input fastq files, either one or two (paired)
|
|
||||||
example: sample.fastq
|
|
||||||
- name: "--bc_pattern"
|
|
||||||
type: string
|
|
||||||
description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI."
|
|
||||||
multiple: true
|
|
||||||
multiple_sep: ","
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--fastq_1"
|
|
||||||
type: file
|
|
||||||
required: true
|
|
||||||
description: Output file for read 1.
|
|
||||||
direction: output
|
|
||||||
default: $id.$key.read_1.fastq
|
|
||||||
- name: "--fastq_2"
|
|
||||||
type: file
|
|
||||||
required: false
|
|
||||||
must_exist: false
|
|
||||||
description: Output file for read 2.
|
|
||||||
direction: output
|
|
||||||
default: $id.$key.read_2.fastq
|
|
||||||
|
|
||||||
- name: "Optional arguments"
|
|
||||||
arguments:
|
|
||||||
- name: "--umitools_extract_method"
|
|
||||||
type: "string"
|
|
||||||
description: UMI pattern to use.
|
|
||||||
default: string
|
|
||||||
choices: [ string, regex ]
|
|
||||||
- name: "--umitools_umi_separator"
|
|
||||||
type: string
|
|
||||||
default: "_"
|
|
||||||
description: The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.
|
|
||||||
- name: "--umitools_grouping_method"
|
|
||||||
type: string
|
|
||||||
description: Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.
|
|
||||||
default: "directional"
|
|
||||||
choices: ["unique", "percentile", "cluster", "adjacency", "directional"]
|
|
||||||
- name: "--umi_discard_read"
|
|
||||||
type: integer
|
|
||||||
description: After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.
|
|
||||||
choices: [0, 1, 2]
|
|
||||||
default: 0
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
|
|
||||||
test_resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: test.sh
|
|
||||||
- path: /testData/unit_test_resources/scrb_seq_fastq.1.gz
|
|
||||||
- path: /testData/unit_test_resources/scrb_seq_fastq.2.gz
|
|
||||||
- path: /testData/unit_test_resources/slim.fastq.gz
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [pip]
|
|
||||||
- type: python
|
|
||||||
packages: [umi_tools]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
function clean_up {
|
|
||||||
rm -rf "$tmpdir"
|
|
||||||
}
|
|
||||||
trap clean_up EXIT
|
|
||||||
|
|
||||||
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
|
|
||||||
|
|
||||||
IFS="," read -ra input <<< "$par_input"
|
|
||||||
IFS="," read -ra pattern <<< "$par_bc_pattern"
|
|
||||||
|
|
||||||
read_count="${#input[@]}"
|
|
||||||
pattern_count="${#pattern[@]}"
|
|
||||||
|
|
||||||
if [ "$par_paired" == "true" ]; then
|
|
||||||
echo "Paired - Reads: $read_count bc_patterns: $pattern_count"
|
|
||||||
if [ "$read_count" -ne 2 ] || [ "$pattern_count" -ne 2 ]; then
|
|
||||||
echo "Paired end input requires two read files and two UMI patterns"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
read1="$(basename -- ${input[0]})"
|
|
||||||
read2="$(basename -- ${input[1]})"
|
|
||||||
umi_tools extract \
|
|
||||||
-I "${input[0]}" --read2-in="${input[1]}" \
|
|
||||||
-S "$tmpdir/$read1" \
|
|
||||||
--read2-out="$tmpdir/$read2" \
|
|
||||||
--extract-method $par_umitools_extract_method \
|
|
||||||
--bc-pattern "${pattern[0]}" \
|
|
||||||
--bc-pattern2 "${pattern[1]}" \
|
|
||||||
--umi-separator $par_umitools_umi_separator
|
|
||||||
if [ $par_umi_discard_read == 1 ]; then
|
|
||||||
# discard read 1
|
|
||||||
cp $tmpdir/$read1 $par_fastq_1
|
|
||||||
elif [ $par_umi_discard_read == 2 ]; then
|
|
||||||
# discard read 2
|
|
||||||
cp $tmpdir/$read2 $par_fastq_1
|
|
||||||
else
|
|
||||||
cp $tmpdir/$read1 $par_fastq_1
|
|
||||||
cp $tmpdir/$read2 $par_fastq_2
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Not Paired - $read_count"
|
|
||||||
if [ "$read_count" -ne 1 ] || [ "$pattern_count" -ne 1 ]; then
|
|
||||||
echo "Single end input requires one read file and one UMI pattern"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
read1="$(basename -- ${input[0]})"
|
|
||||||
umi_tools extract \
|
|
||||||
-I "${input[0]}" -S "$tmpdir/$read1" \
|
|
||||||
--extract-method $par_umitools_extract_method \
|
|
||||||
--bc-pattern "${pattern[0]}" \
|
|
||||||
--umi-separator $par_umitools_umi_separator
|
|
||||||
cp $tmpdir/$read1 $par_fastq_1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
echo ">>> Testing $meta_functionality_name"
|
|
||||||
|
|
||||||
echo ">>> Testing for paired-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired true \
|
|
||||||
--input $meta_resources_dir/scrb_seq_fastq.1.gz,$meta_resources_dir/scrb_seq_fastq.2.gz \
|
|
||||||
--bc_pattern CCCCCCNNNNNNNNNN,CCCCCCNNNNNNNNNN \
|
|
||||||
--umitools_extract_method string \
|
|
||||||
--umitools_umi_separator '_' \
|
|
||||||
--umitools_grouping_method directional \
|
|
||||||
--umi_discard_read 0 \
|
|
||||||
--fastq_1 scrb_seq_fastq.1.umi_extract.fastq.gz \
|
|
||||||
--fastq_2 scrb_seq_fastq.2.umi_extract.fastq.gz
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[[ ! -f scrb_seq_fastq.1.umi_extract.fastq.gz ]] || [[ ! -f scrb_seq_fastq.2.umi_extract.fastq.gz ]] && echo "Reads file missing" && exit 1
|
|
||||||
[ ! -s "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is empty" && exit 1
|
|
||||||
[ ! -s "scrb_seq_fastq.2.umi_extract.fastq.gz" ] && echo "Read 2 file is empty" && exit 1
|
|
||||||
|
|
||||||
rm scrb_seq_fastq.1.umi_extract.fastq.gz scrb_seq_fastq.2.umi_extract.fastq.gz
|
|
||||||
|
|
||||||
echo ">>> Testing for paired-end reads with umi_discard_reads option"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired true \
|
|
||||||
--input $meta_resources_dir/scrb_seq_fastq.1.gz,$meta_resources_dir/scrb_seq_fastq.2.gz \
|
|
||||||
--bc_pattern CCCCCCNNNNNNNNNN,CCCCCCNNNNNNNNNN \
|
|
||||||
--umitools_extract_method string \
|
|
||||||
--umitools_umi_separator '_' \
|
|
||||||
--umitools_grouping_method directional \
|
|
||||||
--umi_discard_read 2 \
|
|
||||||
--fastq_1 scrb_seq_fastq.1.umi_extract.fastq.gz \
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[ ! -f "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is missing" && exit 1
|
|
||||||
[ ! -s "scrb_seq_fastq.1.umi_extract.fastq.gz" ] && echo "Read 1 file is empty" && exit 1
|
|
||||||
[ -f "scrb_seq_fastq.2.umi_extract.fastq.gz" ] && echo "Read 2 is not discarded" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Testing for single-end reads"
|
|
||||||
"$meta_executable" \
|
|
||||||
--paired false \
|
|
||||||
--input $meta_resources_dir/slim.fastq.gz \
|
|
||||||
--bc_pattern "^(?P<umi_1>.{3}).{4}(?P<umi_2>.{2})" \
|
|
||||||
--umitools_extract_method regex \
|
|
||||||
--umitools_umi_separator '_' \
|
|
||||||
--umitools_grouping_method directional \
|
|
||||||
--umi_discard_read 0 \
|
|
||||||
--fastq_1 slim.umi_extract.fastq.gz
|
|
||||||
|
|
||||||
echo ">> Checking if the correct files are present"
|
|
||||||
[ ! -f "slim.umi_extract.fastq.gz" ] && echo "Trimmed reads file missing" && exit 1
|
|
||||||
[ ! -s "slim.umi_extract.fastq.gz" ] && echo "Trimmed reads file is empty" && exit 1
|
|
||||||
|
|
||||||
echo ">>> Test finished successfully"
|
|
||||||
exit 0
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
name: "umitools_prepareforquant"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: https://github.com/nf-core/rnaseq.git
|
|
||||||
paths: [modules/local/umitools_prepareforrsem.nf]
|
|
||||||
last_sha: 0a1bdcfbb498987643b74e9fccab85ccd9f2a17d
|
|
||||||
description: Fix paired-end reads in name sorted BAM file to prepare for salmon quantification
|
|
||||||
|
|
||||||
argument_groups:
|
|
||||||
- name: "Input"
|
|
||||||
arguments:
|
|
||||||
- name: "--bam"
|
|
||||||
type: file
|
|
||||||
|
|
||||||
- name: "Output"
|
|
||||||
arguments:
|
|
||||||
- name: "--output"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
default: $id.transcriptome_sorted.bam
|
|
||||||
- name: "--log"
|
|
||||||
type: file
|
|
||||||
direction: output
|
|
||||||
default: $id.$key.log
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- type: bash_script
|
|
||||||
path: script.sh
|
|
||||||
# copied from https://github.com/nf-core/rnaseq/blob/3.12.0/bin/prepare-for-rsem.py
|
|
||||||
- path: prepare-for-rsem.py
|
|
||||||
|
|
||||||
engines:
|
|
||||||
- type: docker
|
|
||||||
image: ubuntu:22.04
|
|
||||||
setup:
|
|
||||||
- type: apt
|
|
||||||
packages: [pip]
|
|
||||||
- type: python
|
|
||||||
packages: [umi_tools, pysam]
|
|
||||||
runners:
|
|
||||||
- type: executable
|
|
||||||
- type: nextflow
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
|
||||||
|
|
||||||
python3 "$meta_resources_dir/prepare-for-rsem.py" \
|
|
||||||
--stdin=$par_bam \
|
|
||||||
--stdout=$par_output \
|
|
||||||
--log=$par_log
|
|
||||||
@@ -4,173 +4,173 @@ description: |
|
|||||||
A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.
|
A viash sub-workflow for genome alignment and quantification stage of nf-core/rnaseq pipeline.
|
||||||
|
|
||||||
argument_groups:
|
argument_groups:
|
||||||
- name: "Input"
|
- name: "Input"
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--id"
|
- name: "--id"
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
description: ID of the sample.
|
description: ID of the sample.
|
||||||
example: foo
|
example: foo
|
||||||
- name: "--fastq_1"
|
- name: "--fastq_1"
|
||||||
alternatives: [-i]
|
alternatives: [-i]
|
||||||
type: file
|
type: file
|
||||||
description: Path to the sample (or read 1 of paired end sample).
|
description: Path to the sample (or read 1 of paired end sample).
|
||||||
required: true
|
required: true
|
||||||
example: input.fastq.gz
|
example: input.fastq.gz
|
||||||
- name: "--fastq_2"
|
- name: "--fastq_2"
|
||||||
type: file
|
type: file
|
||||||
required: false
|
required: false
|
||||||
description: Path to read 2 of the sample.
|
description: Path to read 2 of the sample.
|
||||||
- name: "--strandedness"
|
- name: "--strandedness"
|
||||||
type: string
|
type: string
|
||||||
required: false
|
required: false
|
||||||
description: Sample strand-specificity. Must be one of unstranded, forward, or reverse
|
description: Sample strand-specificity. Must be one of unstranded, forward, or reverse
|
||||||
choices: [forward, reverse, unstranded]
|
choices: [forward, reverse, unstranded]
|
||||||
- name: "--gtf"
|
- name: "--gtf"
|
||||||
type: file
|
type: file
|
||||||
description: GTF file
|
description: GTF file
|
||||||
- name: "--transcript_fasta"
|
- name: "--transcript_fasta"
|
||||||
type: file
|
type: file
|
||||||
description: Fasta file of the reference transcriptome.
|
description: Fasta file of the reference transcriptome.
|
||||||
- name: "--star_index"
|
- name: "--star_index"
|
||||||
type: file
|
type: file
|
||||||
description: STAR index directory.
|
description: STAR index directory.
|
||||||
- name: "--star_ignore_sjdbgtf"
|
- name: "--star_ignore_sjdbgtf"
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
description: When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file
|
description: When using pre-built STAR indices do not re-extract and use splice junctions from the GTF file
|
||||||
- name: --star_sjdb_gtf_feature_exon
|
- name: --star_sjdb_gtf_feature_exon
|
||||||
type: string
|
type: string
|
||||||
description: Feature type in GTF file to be used as exons for building transcripts
|
description: Feature type in GTF file to be used as exons for building transcripts
|
||||||
- name: "--bam_csi_index"
|
- name: "--bam_csi_index"
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.
|
description: Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes.
|
||||||
- name: "--umi_dedup_stats"
|
- name: "--umi_dedup_stats"
|
||||||
type: boolean
|
type: boolean
|
||||||
description: Generate output stats when running "umi_tools dedup".
|
description: Generate output stats when running "umi_tools dedup".
|
||||||
default: false
|
default: false
|
||||||
- name: "--with_umi"
|
- name: "--with_umi"
|
||||||
type: boolean
|
type: boolean
|
||||||
description: Enable UMI-based read deduplication.
|
description: Enable UMI-based read deduplication.
|
||||||
default: false
|
default: false
|
||||||
- name: "--salmon_quant_libtype"
|
- name: "--salmon_quant_libtype"
|
||||||
type: string
|
type: string
|
||||||
description: Override Salmon library type inferred based on strandedness defined in meta object.
|
description: Override Salmon library type inferred based on strandedness defined in meta object.
|
||||||
- name: "--extra_salmon_quant_args"
|
- name: "--extra_salmon_quant_args"
|
||||||
type: string
|
type: string
|
||||||
default: ''
|
default: ''
|
||||||
description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.
|
description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.
|
||||||
- name: "--gtf_group_features"
|
- name: "--gtf_group_features"
|
||||||
type: string
|
type: string
|
||||||
default: 'gene_id'
|
default: 'gene_id'
|
||||||
description: Define the attribute type used to group features in the GTF file when running Salmon.
|
description: Define the attribute type used to group features in the GTF file when running Salmon.
|
||||||
- name: "--gtf_extra_attributes"
|
- name: "--gtf_extra_attributes"
|
||||||
type: string
|
type: string
|
||||||
default: 'gene_name'
|
default: 'gene_name'
|
||||||
description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon.
|
description: By default, the pipeline uses the gene_name field to obtain additional gene identifiers from the input GTF file when running Salmon.
|
||||||
- name: --extra_rsem_calculate_expression_args
|
- name: --extra_rsem_calculate_expression_args
|
||||||
type: string
|
type: string
|
||||||
description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.
|
description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.
|
||||||
- name: "--aligner"
|
- name: "--aligner"
|
||||||
type: string
|
type: string
|
||||||
description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.
|
description: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'.
|
||||||
choices: [star_salmon, star_rsem, hisat2]
|
choices: [star_salmon, star_rsem, hisat2]
|
||||||
default: "star_salmon"
|
default: "star_salmon"
|
||||||
- name: "--rsem_index"
|
- name: "--rsem_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory for pre-built RSEM index.
|
description: Path to directory for pre-built RSEM index.
|
||||||
- name: "--salmon_index"
|
- name: "--salmon_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory for pre-built Salmon index.
|
description: Path to directory for pre-built Salmon index.
|
||||||
|
|
||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--star_multiqc"
|
- name: "--star_multiqc"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id_star.log
|
default: $id_star.log
|
||||||
- name: "--genome_bam_sorted"
|
- name: "--genome_bam_sorted"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.genome.bam
|
default: $id.genome.bam
|
||||||
- name: "--genome_bam_index"
|
- name: "--genome_bam_index"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.genome.bam.bai
|
default: $id.genome.bam.bai
|
||||||
- name: "--genome_bam_stats"
|
- name: "--genome_bam_stats"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.genome.stats
|
default: $id.genome.stats
|
||||||
- name: "--genome_bam_flagstat"
|
- name: "--genome_bam_flagstat"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.genome.flagstat
|
default: $id.genome.flagstat
|
||||||
- name: "--genome_bam_idxstats"
|
- name: "--genome_bam_idxstats"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.genome.idxstats
|
default: $id.genome.idxstats
|
||||||
- name: "--transcriptome_bam"
|
- name: "--transcriptome_bam"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.transcriptome.bam
|
default: $id.transcriptome.bam
|
||||||
- name: "--transcriptome_bam_index"
|
- name: "--transcriptome_bam_index"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.transcriptome.bam.bai
|
default: $id.transcriptome.bam.bai
|
||||||
- name: "--transcriptome_bam_stats"
|
- name: "--transcriptome_bam_stats"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.transcriptome.stats
|
default: $id.transcriptome.stats
|
||||||
- name: "--transcriptome_bam_flagstat"
|
- name: "--transcriptome_bam_flagstat"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.transcriptome.flagstat
|
default: $id.transcriptome.flagstat
|
||||||
- name: "--transcriptome_bam_idxstats"
|
- name: "--transcriptome_bam_idxstats"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.transcriptome.idxstats
|
default: $id.transcriptome.idxstats
|
||||||
- name: "--quant_out_dir"
|
- name: "--quant_out_dir"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.salmon_quant
|
default: $id.salmon_quant
|
||||||
- name: "--quant_results_file"
|
- name: "--quant_results_file"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.quant.sf
|
default: $id.quant.sf
|
||||||
- name: "--salmon_multiqc"
|
- name: "--salmon_multiqc"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--rsem_counts_gene"
|
- name: "--rsem_counts_gene"
|
||||||
type: file
|
type: file
|
||||||
description: Expression counts on gene level
|
description: Expression counts on gene level
|
||||||
default: $id.genes.results
|
default: $id.genes.results
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--counts_transcripts"
|
- name: "--counts_transcripts"
|
||||||
type: file
|
type: file
|
||||||
description: Expression counts on transcript level
|
description: Expression counts on transcript level
|
||||||
default: $id.isoforms.results
|
default: $id.isoforms.results
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--rsem_multiqc"
|
- name: "--rsem_multiqc"
|
||||||
type: file
|
type: file
|
||||||
description: RSEM statistics
|
description: RSEM statistics
|
||||||
default: $id.stat
|
default: $id.stat
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--bam_star_rsem"
|
- name: "--bam_star_rsem"
|
||||||
type: file
|
type: file
|
||||||
description: BAM file generated by STAR (optional)
|
description: BAM file generated by STAR (optional)
|
||||||
default: $id.STAR.genome.bam
|
default: $id.STAR.genome.bam
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--bam_genome_rsem"
|
- name: "--bam_genome_rsem"
|
||||||
type: file
|
type: file
|
||||||
description: Genome BAM file (optional)
|
description: Genome BAM file (optional)
|
||||||
default: $id.genome.bam
|
default: $id.genome.bam
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--bam_transcript_rsem"
|
- name: "--bam_transcript_rsem"
|
||||||
type: file
|
type: file
|
||||||
description: Transcript BAM file (optional)
|
description: Transcript BAM file (optional)
|
||||||
default: $id.transcript.bam
|
default: $id.transcript.bam
|
||||||
direction: output
|
direction: output
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
- type: nextflow_script
|
- type: nextflow_script
|
||||||
@@ -190,16 +190,14 @@ dependencies:
|
|||||||
repository: biobox
|
repository: biobox
|
||||||
- name: samtools/samtools_idxstats
|
- name: samtools/samtools_idxstats
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: umitools/umitools_dedup
|
- name: umi_tools/umi_tools_dedup
|
||||||
# - name: umi_tools/umi_tools_dedup
|
repository: biobox
|
||||||
# repository: biobox
|
- name: umi_tools/umi_tools_prepareforrsem
|
||||||
- name: umitools_prepareforquant
|
repository: biobox
|
||||||
# - name: umi_tools/umi_tools_prepareforquant
|
|
||||||
# repository: biobox
|
|
||||||
- name: salmon/salmon_quant
|
- name: salmon/salmon_quant
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: rsem/rsem_calculate_expression
|
- name: rsem/rsem_calculate_expression
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
|
|
||||||
runners:
|
runners:
|
||||||
- type: executable
|
- type: executable
|
||||||
|
|||||||
@@ -84,167 +84,169 @@ workflow run_wf {
|
|||||||
key: "genome_idxstats"
|
key: "genome_idxstats"
|
||||||
)
|
)
|
||||||
|
|
||||||
//
|
//
|
||||||
// Remove duplicate reads from BAM file based on UMIs
|
// Remove duplicate reads from BAM file based on UMIs
|
||||||
//
|
//
|
||||||
|
|
||||||
// Deduplicate genome BAM file
|
// Deduplicate genome BAM file
|
||||||
| umitools_dedup.run (
|
| umi_tools_dedup.run (
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
fromState: [
|
fromState: { id, state ->
|
||||||
"paired": "paired",
|
def output_stats = state.umi_dedup_stats ? state.id :
|
||||||
"bam": "genome_bam_sorted",
|
[ paired: state.paired,
|
||||||
"bai": "genome_bam_index",
|
input: state.genome_bam,
|
||||||
"get_output_stats": "umi_dedup_stats"
|
bai: state.genome_bam_index,
|
||||||
],
|
output_stats: output_stats]
|
||||||
toState: [ "genome_bam_sorted": "output_bam" ],
|
},
|
||||||
key: "genome_deduped"
|
toState: [ "genome_bam_sorted": "output" ],
|
||||||
)
|
key: "genome_deduped"
|
||||||
| samtools_index.run (
|
)
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
| samtools_index.run (
|
||||||
fromState: [
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
"input": "genome_bam_sorted",
|
fromState: [
|
||||||
"csi": "bam_csi_index"
|
"input": "genome_bam_sorted",
|
||||||
],
|
"csi": "bam_csi_index"
|
||||||
toState: [ "genome_bam_index": "output" ],
|
],
|
||||||
key: "genome_deduped"
|
toState: [ "genome_bam_index": "output" ],
|
||||||
)
|
key: "genome_deduped"
|
||||||
| samtools_stats.run (
|
)
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
| samtools_stats.run (
|
||||||
fromState: [
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
"input": "genome_bam_sorted",
|
fromState: [
|
||||||
"bai": "genome_bam_index",
|
"input": "genome_bam_sorted",
|
||||||
"fasta": "fasta"
|
"bai": "genome_bam_index",
|
||||||
],
|
"fasta": "fasta"
|
||||||
toState: [ "genome_bam_stats": "output" ],
|
],
|
||||||
key: "genome_deduped_stats"
|
toState: [ "genome_bam_stats": "output" ],
|
||||||
)
|
key: "genome_deduped_stats"
|
||||||
| samtools_flagstat.run (
|
)
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
| samtools_flagstat.run (
|
||||||
fromState: [
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
"bam": "genome_bam_sorted",
|
fromState: [
|
||||||
"bai": "genome_bam_index",
|
"bam": "genome_bam_sorted",
|
||||||
"fasta": "fasta"
|
"bai": "genome_bam_index",
|
||||||
],
|
"fasta": "fasta"
|
||||||
toState: [ "genome_bam_flagstat": "output" ],
|
],
|
||||||
key: "genome_deduped_flagstat"
|
toState: [ "genome_bam_flagstat": "output" ],
|
||||||
)
|
key: "genome_deduped_flagstat"
|
||||||
| samtools_idxstats.run(
|
)
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
| samtools_idxstats.run(
|
||||||
fromState: [
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
"bam": "genome_bam_sorted",
|
fromState: [
|
||||||
"bai": "genome_bam_index",
|
"bam": "genome_bam_sorted",
|
||||||
"fasta": "fasta",
|
"bai": "genome_bam_index",
|
||||||
],
|
"fasta": "fasta",
|
||||||
toState: [ "genome_bam_idxstats": "output" ],
|
],
|
||||||
key: "genome_deduped_idxstats"
|
toState: [ "genome_bam_idxstats": "output" ],
|
||||||
)
|
key: "genome_deduped_idxstats"
|
||||||
|
)
|
||||||
|
|
||||||
// Deduplicate transcriptome BAM file
|
// Deduplicate transcriptome BAM file
|
||||||
|
|
||||||
| samtools_sort.run (
|
| samtools_sort.run (
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
fromState: [ "input": "transcriptome_bam" ],
|
fromState: [ "input": "transcriptome_bam" ],
|
||||||
toState: [ "transcriptome_bam": "output" ],
|
toState: [ "transcriptome_bam": "output" ],
|
||||||
key: "transcriptome_sorted"
|
key: "transcriptome_sorted"
|
||||||
)
|
)
|
||||||
| samtools_index.run (
|
| samtools_index.run (
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
|
||||||
fromState: [
|
|
||||||
"input": "transcriptome_bam",
|
|
||||||
"csi": "bam_csi_index"
|
|
||||||
],
|
|
||||||
toState: [ "transcriptome_bam_index": "output" ],
|
|
||||||
key: "transcriptome_sorted"
|
|
||||||
)
|
|
||||||
| samtools_stats.run (
|
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
|
||||||
fromState: [
|
|
||||||
"input": "transcriptome_bam",
|
|
||||||
"bai": "transcriptome_bam_index",
|
|
||||||
],
|
|
||||||
toState: [ "transcriptome_bam_stats": "output" ],
|
|
||||||
key: "transcriptome_stats"
|
|
||||||
)
|
|
||||||
| samtools_flagstat.run (
|
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
|
||||||
fromState: [
|
|
||||||
"bam": "transcriptome_bam",
|
|
||||||
"bai": "transcriptome_bam_index"
|
|
||||||
],
|
|
||||||
toState: [ "transcriptome_bam_flagstat": "output" ],
|
|
||||||
key: "transcriptome_flagstat"
|
|
||||||
)
|
|
||||||
| samtools_idxstats.run(
|
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
|
||||||
fromState: [
|
|
||||||
"bam": "transcriptome_bam",
|
|
||||||
"bai": "transcriptome_bam_index"
|
|
||||||
],
|
|
||||||
toState: [ "transcriptome_bam_idxstats": "output" ],
|
|
||||||
key: "transcriptome_idxstats"
|
|
||||||
)
|
|
||||||
|
|
||||||
| umitools_dedup.run (
|
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
|
||||||
fromState: [
|
|
||||||
"paired": "paired",
|
|
||||||
"bam": "transcriptome_bam",
|
|
||||||
"bai": "transcriptome_bam_index",
|
|
||||||
"get_output_stats": "umi_dedup_stats",
|
|
||||||
],
|
|
||||||
toState: [ "transcriptome_bam_deduped": "output_bam" ],
|
|
||||||
key: "transcriptome_deduped"
|
|
||||||
)
|
|
||||||
| samtools_sort.run (
|
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
|
||||||
fromState: [ "input": "transcriptome_bam_deduped" ],
|
|
||||||
toState: [ "transcriptome_bam": "output" ],
|
|
||||||
key: "transcriptome_deduped_sorted"
|
|
||||||
)
|
|
||||||
| samtools_index.run (
|
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "transcriptome_bam",
|
"input": "transcriptome_bam",
|
||||||
"csi": "bam_csi_index"
|
"csi": "bam_csi_index"
|
||||||
],
|
],
|
||||||
toState: [ "transcriptome_bam_index": "output" ],
|
toState: [ "transcriptome_bam_index": "output" ],
|
||||||
|
key: "transcriptome_sorted"
|
||||||
|
)
|
||||||
|
| samtools_stats.run (
|
||||||
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
|
fromState: [
|
||||||
|
"input": "transcriptome_bam",
|
||||||
|
"bai": "transcriptome_bam_index",
|
||||||
|
],
|
||||||
|
toState: [ "transcriptome_bam_stats": "output" ],
|
||||||
|
key: "transcriptome_stats"
|
||||||
|
)
|
||||||
|
| samtools_flagstat.run (
|
||||||
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
|
fromState: [
|
||||||
|
"bam": "transcriptome_bam",
|
||||||
|
"bai": "transcriptome_bam_index"
|
||||||
|
],
|
||||||
|
toState: [ "transcriptome_bam_flagstat": "output" ],
|
||||||
|
key: "transcriptome_flagstat"
|
||||||
|
)
|
||||||
|
| samtools_idxstats.run(
|
||||||
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
|
fromState: [
|
||||||
|
"bam": "transcriptome_bam",
|
||||||
|
"bai": "transcriptome_bam_index"
|
||||||
|
],
|
||||||
|
toState: [ "transcriptome_bam_idxstats": "output" ],
|
||||||
|
key: "transcriptome_idxstats"
|
||||||
|
)
|
||||||
|
|
||||||
|
| umi_tools_dedup.run (
|
||||||
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
|
fromState: { id, state ->
|
||||||
|
def output_stats = state.umi_dedup_stats ? state.id :
|
||||||
|
[ paired: state.paired,
|
||||||
|
input: state.transcriptome_bam,
|
||||||
|
bai: state.transcriptome_bam_index,
|
||||||
|
output_stats: output_stats]
|
||||||
|
},
|
||||||
|
toState: [ "transcriptome_bam_deduped": "output" ],
|
||||||
|
key: "transcriptome_deduped"
|
||||||
|
)
|
||||||
|
| samtools_sort.run (
|
||||||
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
|
fromState: [ "input": "transcriptome_bam_deduped" ],
|
||||||
|
toState: [ "transcriptome_bam": "output" ],
|
||||||
key: "transcriptome_deduped_sorted"
|
key: "transcriptome_deduped_sorted"
|
||||||
)
|
)
|
||||||
| samtools_stats.run (
|
| samtools_index.run (
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "transcriptome_bam",
|
"input": "transcriptome_bam",
|
||||||
"bai": "transcriptome_bam_index"
|
"csi": "bam_csi_index"
|
||||||
],
|
],
|
||||||
toState: [ "transcriptome_bam_stats": "output" ],
|
toState: [ "transcriptome_bam_index": "output" ],
|
||||||
key: "transcriptome_deduped_stats"
|
key: "transcriptome_deduped_sorted"
|
||||||
)
|
)
|
||||||
| samtools_flagstat.run (
|
| samtools_stats.run (
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
fromState: [
|
fromState: [
|
||||||
"bam": "transcriptome_bam",
|
"input": "transcriptome_bam",
|
||||||
"bai": "transcriptome_bam_index"
|
"bai": "transcriptome_bam_index"
|
||||||
],
|
],
|
||||||
toState: [ "transcriptome_bam_flagstat": "output" ],
|
toState: [ "transcriptome_bam_stats": "output" ],
|
||||||
key: "transcriptome_deduped_flagstat"
|
key: "transcriptome_deduped_stats"
|
||||||
)
|
)
|
||||||
| samtools_idxstats.run(
|
| samtools_flagstat.run (
|
||||||
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
fromState: [
|
fromState: [
|
||||||
"bam": "transcriptome_bam",
|
"bam": "transcriptome_bam",
|
||||||
"bai": "transcriptome_bam_index"
|
"bai": "transcriptome_bam_index"
|
||||||
],
|
],
|
||||||
toState: [ "transcriptome_bam_idxstats": "output" ],
|
toState: [ "transcriptome_bam_flagstat": "output" ],
|
||||||
key: "transcriptome_deduped_idxstats"
|
key: "transcriptome_deduped_flagstat"
|
||||||
)
|
)
|
||||||
|
| samtools_idxstats.run(
|
||||||
|
runIf: { id, state -> state.with_umi && state.aligner == 'star_salmon' },
|
||||||
|
fromState: [
|
||||||
|
"bam": "transcriptome_bam",
|
||||||
|
"bai": "transcriptome_bam_index"
|
||||||
|
],
|
||||||
|
toState: [ "transcriptome_bam_idxstats": "output" ],
|
||||||
|
key: "transcriptome_deduped_idxstats"
|
||||||
|
)
|
||||||
|
|
||||||
// Fix paired-end reads in name sorted BAM file
|
// Fix paired-end reads in name sorted BAM file
|
||||||
| umitools_prepareforquant.run (
|
| umi_tools_prepareforrsem.run (
|
||||||
runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' },
|
runIf: { id, state -> state.with_umi && state.paired && state.aligner == 'star_salmon' },
|
||||||
fromState: [ "bam": "transcriptome_bam" ],
|
fromState: [ "input": "transcriptome_bam" ],
|
||||||
toState: [ "transcriptome_bam": "output" ]
|
toState: [ "transcriptome_bam": "output" ]
|
||||||
)
|
)
|
||||||
|
|
||||||
// Infer lib-type for salmon quant
|
// Infer lib-type for salmon quant
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
@@ -281,78 +283,91 @@ workflow run_wf {
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state
|
def mod_state = (state.aligner == 'star_salmon') ? state + [salmon_multiqc: state.quant_out_dir] : state
|
||||||
[ id, mod_state ]
|
[ id, mod_state ]
|
||||||
}
|
}
|
||||||
|
|
||||||
| rsem_calculate_expression.run (
|
| rsem_calculate_expression.run (
|
||||||
runIf: { id, state -> state.aligner == 'star_rsem' },
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
||||||
fromState: [
|
fromState: [
|
||||||
"id": "id",
|
"id": "id",
|
||||||
"strandedness": "strandedness",
|
"strandedness": "strandedness",
|
||||||
"paired": "paired",
|
"paired": "paired",
|
||||||
"input": "input",
|
"input": "input",
|
||||||
"index": "rsem_index",
|
"index": "rsem_index",
|
||||||
"extra_args": "extra_rsem_calculate_expression_args"
|
"counts_gene": "rsem_counts_gene",
|
||||||
],
|
"counts_transcripts": "rsem_counts_transcripts",
|
||||||
toState: [
|
"stat": "rsem_multiqc",
|
||||||
"rsem_counts_gene": "counts_gene",
|
"logs": "star_multiqc",
|
||||||
"rsem_counts_transcripts": "counts_transcripts",
|
"bam_star": "bam_star_rsem",
|
||||||
"rsem_multiqc": "stat",
|
"bam_genome": "bam_genome_rsem",
|
||||||
"star_multiqc": "logs",
|
"bam_transcript": "bam_transcript_rsem"
|
||||||
"bam_star_rsem": "bam_star",
|
],
|
||||||
"bam_genome_rsem": "bam_genome",
|
args: [
|
||||||
"bam_transcript_rsem": "bam_transcript"
|
star: true,
|
||||||
]
|
star_output_genome_bam: true,
|
||||||
)
|
star_gzipped_read_file: true,
|
||||||
|
estimate_rspd: true,
|
||||||
// RSEM_Star BAM
|
seed: 1
|
||||||
| samtools_sort.run (
|
],
|
||||||
runIf: { id, state -> state.aligner == 'star_rsem' },
|
toState: [
|
||||||
fromState: ["input": "bam_star_rsem"],
|
"rsem_counts_gene": "counts_gene",
|
||||||
toState: ["genome_bam_sorted": "output"],
|
"rsem_counts_transcripts": "counts_transcripts",
|
||||||
key: "genome_sorted"
|
"rsem_multiqc": "stat",
|
||||||
)
|
"star_multiqc": "logs",
|
||||||
| samtools_index.run (
|
"bam_star_rsem": "bam_star",
|
||||||
runIf: { id, state -> state.aligner == 'star_rsem' },
|
"bam_genome_rsem": "bam_genome",
|
||||||
fromState: [
|
"bam_transcript_rsem": "bam_transcript"
|
||||||
"input": "genome_bam_sorted",
|
]
|
||||||
"csi": "bam_csi_index"
|
)
|
||||||
],
|
|
||||||
toState: [ "genome_bam_index": "output" ],
|
// RSEM_Star BAM
|
||||||
key: "genome_sorted"
|
| samtools_sort.run (
|
||||||
)
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
||||||
| samtools_stats.run (
|
fromState: ["input": "bam_star_rsem"],
|
||||||
runIf: { id, state -> state.aligner == 'star_rsem' },
|
toState: ["genome_bam_sorted": "output"],
|
||||||
fromState: [
|
key: "genome_sorted"
|
||||||
"input": "genome_bam_sorted",
|
)
|
||||||
"bai": "genome_bam_index",
|
| samtools_index.run (
|
||||||
"fasta": "fasta"
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
||||||
],
|
fromState: [
|
||||||
toState: [ "genome_bam_stats": "output" ],
|
"input": "genome_bam_sorted",
|
||||||
key: "genome_stats"
|
"csi": "bam_csi_index"
|
||||||
)
|
],
|
||||||
| samtools_flagstat.run (
|
toState: [ "genome_bam_index": "output" ],
|
||||||
runIf: { id, state -> state.aligner == 'star_rsem' },
|
key: "genome_sorted"
|
||||||
fromState: [
|
)
|
||||||
"bam": "genome_bam_sorted",
|
| samtools_stats.run (
|
||||||
"bai": "genome_bam_index",
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
||||||
"fasta": "fasta"
|
fromState: [
|
||||||
],
|
"input": "genome_bam_sorted",
|
||||||
toState: [ "genome_bam_flagstat": "output" ],
|
"bai": "genome_bam_index",
|
||||||
key: "genome_flagstat"
|
"fasta": "fasta"
|
||||||
)
|
],
|
||||||
| samtools_idxstats.run(
|
toState: [ "genome_bam_stats": "output" ],
|
||||||
runIf: { id, state -> state.aligner == 'star_rsem' },
|
key: "genome_stats"
|
||||||
fromState: [
|
)
|
||||||
"bam": "genome_bam_sorted",
|
| samtools_flagstat.run (
|
||||||
"bai": "genome_bam_index",
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
||||||
"fasta": "fasta"
|
fromState: [
|
||||||
],
|
"bam": "genome_bam_sorted",
|
||||||
toState: [ "genome_bam_idxstats": "output" ],
|
"bai": "genome_bam_index",
|
||||||
key: "genome_idxstats"
|
"fasta": "fasta"
|
||||||
)
|
],
|
||||||
|
toState: [ "genome_bam_flagstat": "output" ],
|
||||||
|
key: "genome_flagstat"
|
||||||
|
)
|
||||||
|
| samtools_idxstats.run(
|
||||||
|
runIf: { id, state -> state.aligner == 'star_rsem' },
|
||||||
|
fromState: [
|
||||||
|
"bam": "genome_bam_sorted",
|
||||||
|
"bai": "genome_bam_index",
|
||||||
|
"fasta": "fasta"
|
||||||
|
],
|
||||||
|
toState: [ "genome_bam_idxstats": "output" ],
|
||||||
|
key: "genome_idxstats"
|
||||||
|
)
|
||||||
|
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# v;iash ns build --setup cb --parallel
|
viash ns build --setup cb --parallel
|
||||||
|
|
||||||
# Split error message from standard output
|
# Split error message from standard output
|
||||||
# viash ns list > /dev/null
|
# viash ns list > /dev/null
|
||||||
@@ -37,7 +37,6 @@ nextflow run target/nextflow/workflows/genome_alignment_and_quant/main.nf \
|
|||||||
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
||||||
--rsem_index testData/minimal_test/reference/rsem_index \
|
--rsem_index testData/minimal_test/reference/rsem_index \
|
||||||
--aligner star_rsem \
|
--aligner star_rsem \
|
||||||
--extra_rsem_calculate_expression_args "--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1" \
|
|
||||||
-profile docker \
|
-profile docker \
|
||||||
-resume
|
-resume
|
||||||
|
|
||||||
|
|||||||
@@ -50,10 +50,6 @@ argument_groups:
|
|||||||
- name: "--stringtie_ignore_gtf"
|
- name: "--stringtie_ignore_gtf"
|
||||||
type: boolean
|
type: boolean
|
||||||
description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file.
|
description: Perform reference-guided de novo assembly of transcripts using StringTie, i.e. don't restrict to those in GTF file.
|
||||||
- name: "--extra_bedtools_args"
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
description: Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--bam_csi_index"
|
- name: "--bam_csi_index"
|
||||||
type: boolean
|
type: boolean
|
||||||
default: false
|
default: false
|
||||||
@@ -151,9 +147,8 @@ dependencies:
|
|||||||
- name: samtools/samtools_idxstats
|
- name: samtools/samtools_idxstats
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: stringtie
|
- name: stringtie
|
||||||
# - name: bedtools/bedtools_genomecov
|
- name: bedtools/bedtools_genomecov
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
- name: bedtools_genomecov
|
|
||||||
- name: ucsc/bedclip
|
- name: ucsc/bedclip
|
||||||
- name: ucsc/bedgraphtobigwig
|
- name: ucsc/bedgraphtobigwig
|
||||||
|
|
||||||
|
|||||||
@@ -81,18 +81,35 @@ workflow run_wf {
|
|||||||
|
|
||||||
// Genome-wide coverage with BEDTools
|
// Genome-wide coverage with BEDTools
|
||||||
|
|
||||||
| bedtools_genomecov.run (
|
| bedtools_genomecov.run (
|
||||||
runIf: { id, state -> !state.skip_bigwig },
|
runIf: { id, state -> !state.skip_bigwig },
|
||||||
fromState: [
|
fromState: [
|
||||||
"strandedness": "strandedness",
|
"input_bam": "processed_genome_bam",
|
||||||
"bam": "processed_genome_bam",
|
],
|
||||||
"extra_bedtools_args": "extra_bedtools_args"
|
args: [
|
||||||
],
|
split: true,
|
||||||
toState: [
|
du: true,
|
||||||
"bedgraph_forward": "bedgraph_forward",
|
bed_graph: true,
|
||||||
"bedgraph_reverse": "bedgraph_reverse"
|
strand: "+"
|
||||||
]
|
],
|
||||||
)
|
toState: [ "bedgraph_forward": "output" ],
|
||||||
|
key: "bedtools_genomecov_forward"
|
||||||
|
)
|
||||||
|
|
||||||
|
| bedtools_genomecov.run (
|
||||||
|
runIf: { id, state -> !state.skip_bigwig },
|
||||||
|
fromState: [
|
||||||
|
"input_bam": "processed_genome_bam",
|
||||||
|
],
|
||||||
|
args: [
|
||||||
|
split: true,
|
||||||
|
du: true,
|
||||||
|
bed_graph: true,
|
||||||
|
strand: "-"
|
||||||
|
],
|
||||||
|
toState: [ "bedgraph_reverse": "output" ],
|
||||||
|
key: "bedtools_genomecov_reverse"
|
||||||
|
)
|
||||||
|
|
||||||
| bedclip.run (
|
| bedclip.run (
|
||||||
runIf: { id, state -> !state.skip_bigwig },
|
runIf: { id, state -> !state.skip_bigwig },
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# viash ns build --setup cb --parallel
|
viash ns build --setup cb --parallel
|
||||||
|
|
||||||
nextflow run target/nextflow/workflows/post_processing/main.nf \
|
nextflow run target/nextflow/workflows/post_processing/main.nf \
|
||||||
--publish_dir "testData/paired_end_test" \
|
--publish_dir "testData/paired_end_test" \
|
||||||
@@ -14,8 +14,6 @@ nextflow run target/nextflow/workflows/post_processing/main.nf \
|
|||||||
--chrom_sizes "testData/test_output/reference_genome.fasta.sizes" \
|
--chrom_sizes "testData/test_output/reference_genome.fasta.sizes" \
|
||||||
--star_multiqc "testData/paired_end_test/SRR6357070.star_align.log" \
|
--star_multiqc "testData/paired_end_test/SRR6357070.star_align.log" \
|
||||||
--extra_picard_args "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" \
|
--extra_picard_args "--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp" \
|
||||||
--extra_bedtools_args "-split -du" \
|
|
||||||
--extra_featurecounts_args "-B -C" \
|
|
||||||
--gencode false \
|
--gencode false \
|
||||||
--biotype gene_biotype \
|
--biotype gene_biotype \
|
||||||
-profile docker \
|
-profile docker \
|
||||||
|
|||||||
@@ -29,9 +29,6 @@ argument_groups:
|
|||||||
- name: "--bbsplit_index"
|
- name: "--bbsplit_index"
|
||||||
type: file
|
type: file
|
||||||
description: BBsplit index
|
description: BBsplit index
|
||||||
- name: "--bbsplit_fasta_list"
|
|
||||||
type: file
|
|
||||||
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)
|
|
||||||
- name: "--ribo_database_manifest"
|
- name: "--ribo_database_manifest"
|
||||||
type: file
|
type: file
|
||||||
description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.
|
description: Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA.
|
||||||
@@ -110,12 +107,6 @@ argument_groups:
|
|||||||
description: Specify the trimming tool to use.
|
description: Specify the trimming tool to use.
|
||||||
choices: [ "trimgalore", "fastp"]
|
choices: [ "trimgalore", "fastp"]
|
||||||
default: "trimgalore"
|
default: "trimgalore"
|
||||||
- name: "--extra_trimgalore_args"
|
|
||||||
type: string
|
|
||||||
description: Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.
|
|
||||||
# - name: "--extra_fastp_args"
|
|
||||||
# type: string
|
|
||||||
# description: Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--min_trimmed_reads"
|
- name: "--min_trimmed_reads"
|
||||||
type: integer
|
type: integer
|
||||||
description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.
|
description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.
|
||||||
@@ -129,31 +120,15 @@ argument_groups:
|
|||||||
description: Save the trimmed FastQ files in the results directory.
|
description: Save the trimmed FastQ files in the results directory.
|
||||||
default: false
|
default: false
|
||||||
|
|
||||||
- name: "Alignment options"
|
|
||||||
arguments:
|
|
||||||
- name: "--extra_salmon_quant_args"
|
|
||||||
type: string
|
|
||||||
default: ''
|
|
||||||
description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.
|
|
||||||
|
|
||||||
- name: "Read filtering options"
|
- name: "Read filtering options"
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--skip_bbsplit"
|
- name: "--skip_bbsplit"
|
||||||
type: boolean_true
|
type: boolean_true
|
||||||
description: Skip BBSplit for removal of non-reference genome reads.
|
description: Skip BBSplit for removal of non-reference genome reads.
|
||||||
# default: true
|
|
||||||
- name: "--remove_ribo_rna"
|
- name: "--remove_ribo_rna"
|
||||||
type: boolean_true
|
type: boolean_true
|
||||||
description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA.
|
description: Enable the removal of reads derived from ribosomal RNA using SortMeRNA.
|
||||||
# default: false
|
|
||||||
|
|
||||||
- name: "Other options"
|
|
||||||
arguments:
|
|
||||||
- name: "--extra_fq_subsample_args"
|
|
||||||
type: string
|
|
||||||
default: '--record-count 1000000 --seed 1'
|
|
||||||
description: Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline.
|
|
||||||
|
|
||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--qc_output1"
|
- name: "--qc_output1"
|
||||||
@@ -162,78 +137,78 @@ argument_groups:
|
|||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
description: Path to output directory
|
description: Path to output directory
|
||||||
default: $id.read_1.fastq
|
default: ${id}_r1.fastq.gz
|
||||||
- name: "--qc_output2"
|
- name: "--qc_output2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
description: Path to output directory
|
description: Path to output directory
|
||||||
default: $id.read_2.fastq
|
default: ${id}_r2.fastq.gz
|
||||||
- name: "--fastqc_html_1"
|
- name: "--fastqc_html_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC HTML report for read 1.
|
description: FastQC HTML report for read 1.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_1.fastqc.html
|
default: ${id}_r1.fastqc.html
|
||||||
- name: "--fastqc_html_2"
|
- name: "--fastqc_html_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC HTML report for read 2.
|
description: FastQC HTML report for read 2.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_2.fastqc.html
|
default: ${id}_r2.fastqc.html
|
||||||
- name: "--fastqc_zip_1"
|
- name: "--fastqc_zip_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC report archive for read 1.
|
description: FastQC report archive for read 1.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_1.fastqc.zip
|
default: ${id}_r1.fastqc.zip
|
||||||
- name: "--fastqc_zip_2"
|
- name: "--fastqc_zip_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC report archive for read 2.
|
description: FastQC report archive for read 2.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_2.fastqc.zip
|
default: ${id}_r2.fastqc.zip
|
||||||
- name: "--trim_log_1"
|
- name: "--trim_log_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_1.trimming_report.txt
|
default: ${id}_r1.trimming_report.txt
|
||||||
- name: "--trim_log_2"
|
- name: "--trim_log_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_2.trimming_report.txt
|
default: ${id}_r2.trimming_report.txt
|
||||||
- name: "--trim_html_1"
|
- name: "--trim_html_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_1.trimmed_fastqc.html
|
default: ${id}_r1.trimmed_fastqc.html
|
||||||
- name: "--trim_html_2"
|
- name: "--trim_html_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_2.trimmed_fastqc.html
|
default: ${id}_r2.trimmed_fastqc.html
|
||||||
- name: "--trim_zip_1"
|
- name: "--trim_zip_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_1.trimmed_fastqc.zip
|
default: ${id}_r1.trimmed_fastqc.zip
|
||||||
- name: "--trim_zip_2"
|
- name: "--trim_zip_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: $id.read_2.trimmed_fastqc.zip
|
default: ${id}_r2.trimmed_fastqc.zip
|
||||||
- name: "--sortmerna_log"
|
- name: "--sortmerna_log"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
@@ -267,20 +242,19 @@ resources:
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: fastqc
|
- name: fastqc
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
- name: umitools/umitools_extract
|
|
||||||
- name: umi_tools/umi_tools_extract
|
- name: umi_tools/umi_tools_extract
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: trimgalore
|
- name: trimgalore
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
- name: bbmap_bbsplit
|
- name: bbmap/bbmap_bbsplit
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
- name: sortmerna
|
- name: sortmerna
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
- name: fastp
|
- name: fastp
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: fq_subsample
|
- name: fq_subsample
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
- name: salmon/salmon_quant
|
- name: salmon/salmon_quant
|
||||||
repository: biobox
|
repository: biobox
|
||||||
|
|
||||||
|
|||||||
@@ -12,48 +12,58 @@ workflow run_wf {
|
|||||||
[ id, state + [paired: paired, input: input] ]
|
[ id, state + [paired: paired, input: input] ]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform QC on input fastq files
|
|
||||||
| fastqc.run (
|
| fastqc.run (
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_fastqc },
|
runIf: { id, state -> !state.skip_qc && !state.skip_fastqc },
|
||||||
fromState: { id, state ->
|
fromState: [ "input": "input" ],
|
||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
toState: {id, output_state, state ->
|
||||||
[ paired: state.paired,
|
def newKeys = [
|
||||||
input: input ]
|
"fastqc_html_1":output_state["html"][0],
|
||||||
},
|
"fastqc_html_2": output_state["html"][1],
|
||||||
toState: [
|
"fastqc_zip_1": output_state["zip"][0],
|
||||||
"fastqc_html_1": "fastqc_html_1",
|
"fastqc_zip_2": output_state["zip"][1]
|
||||||
"fastqc_html_2": "fastqc_html_2",
|
]
|
||||||
"fastqc_zip_1": "fastqc_zip_1",
|
def new_state = state + newKeys
|
||||||
"fastqc_zip_2": "fastqc_zip_2"
|
return new_state
|
||||||
]
|
},
|
||||||
|
args: [html: "*.html", zip: "*.zip"]
|
||||||
)
|
)
|
||||||
|
|
||||||
// Extract UMIs from fastq files and discard read 1 or read 2 if required
|
// Extract UMIs from fastq files and discard read 1 or read 2 if required
|
||||||
| umitools_extract.run (
|
| umi_tools_extract.run (
|
||||||
runIf: { id, state -> state.with_umi && !state.skip_umi_extract },
|
runIf: { id, state -> state.with_umi && !state.skip_umi_extract },
|
||||||
fromState: { id, state ->
|
fromState: { id, state ->
|
||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
def bc_pattern2 = state.paired ? state.umitools_bc_pattern2 : state.remove(state.umitools_bc_pattern2)
|
||||||
def bc_pattern = state.paired ? [ state.umitools_bc_pattern, state.umitools_bc_pattern2 ] : [ state.umitools_bc_pattern ]
|
def output = "${id}.r1.fastq.gz"
|
||||||
[ paired: state.paired,
|
def read2_out = state.paired ? "${id}.r2.fastq.gz" : state.remove(state.fastq_2)
|
||||||
input: input,
|
[ input: state.fastq_1,
|
||||||
bc_pattern: bc_pattern,
|
read2_in: state.fastq_2,
|
||||||
umi_discard_read: state.umi_discard_read ]
|
bc_pattern: state.umitools_bc_pattern,
|
||||||
|
bc_pattern2: bc_pattern2,
|
||||||
|
extract_method: state.umitools_extract_method,
|
||||||
|
umi_separator: state.umitools_umi_separator,
|
||||||
|
grouping_method: state.umitools_grouping_method,
|
||||||
|
output: output,
|
||||||
|
read2_out: read2_out ]
|
||||||
},
|
},
|
||||||
toState: [
|
toState: [
|
||||||
"fastq_1": "fastq_1",
|
"fastq_1": "output",
|
||||||
"fastq_2": "fastq_2"
|
"fastq_2": "read2_out"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
// Discard read if required
|
// Discard read if required
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def paired = state.paired
|
def paired = state.paired
|
||||||
|
def fastq_1 = state.fastq_1
|
||||||
def fastq_2 = state.fastq_2
|
def fastq_2 = state.fastq_2
|
||||||
if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) {
|
if (paired && state.with_umi && !state.skip_umi_extract && state.umi_discard_read != 0) {
|
||||||
fastq_2 = state.remove(state.fastq_2)
|
if (state.umi_discard_read == 1) {
|
||||||
|
fastq_1 = fastq_2
|
||||||
|
}
|
||||||
|
fastq_2 = state.remove(state.fastq_2)
|
||||||
paired = false
|
paired = false
|
||||||
}
|
}
|
||||||
[ id, state + [paired: paired, fastq_2: fastq_2] ]
|
[ id, state + [paired: paired, fastq_1: fastq_1, fastq_2: fastq_2] ]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim reads using Trim galore!
|
// Trim reads using Trim galore!
|
||||||
@@ -63,8 +73,11 @@ workflow run_wf {
|
|||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
||||||
[ paired: state.paired,
|
[ paired: state.paired,
|
||||||
input: input,
|
input: input,
|
||||||
min_trimmed_reads: state.min_trimmed_reads ]
|
min_trimmed_reads: state.min_trimmed_reads,
|
||||||
|
trimmed_r1: state.qc_output1,
|
||||||
|
trimmed_r2: state.qc_output2 ]
|
||||||
},
|
},
|
||||||
|
args: [gzip: true, fastqc: true],
|
||||||
toState: [
|
toState: [
|
||||||
"fastq_1": "trimmed_r1",
|
"fastq_1": "trimmed_r1",
|
||||||
"fastq_2": "trimmed_r2",
|
"fastq_2": "trimmed_r2",
|
||||||
@@ -74,21 +87,22 @@ workflow run_wf {
|
|||||||
"trim_zip_2": "trimmed_fastqc_zip_2",
|
"trim_zip_2": "trimmed_fastqc_zip_2",
|
||||||
"trim_html_1": "trimmed_fastqc_html_1",
|
"trim_html_1": "trimmed_fastqc_html_1",
|
||||||
"trim_html_2": "trimmed_fastqc_html_2"
|
"trim_html_2": "trimmed_fastqc_html_2"
|
||||||
],
|
]
|
||||||
args: [gzip: true, fastqc: true]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Trim reads using fastp
|
// Trim reads using fastp
|
||||||
| fastp.run(
|
| fastp.run(
|
||||||
runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" },
|
runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" },
|
||||||
fromState: [
|
fromState: { id, state ->
|
||||||
"in1": "fastq_1",
|
def outputState = state.paired ? [out1: state.qc_output1, out2: state.qc_output2] : [out1: state.qc_output1, out2: state.remove(state.qc_output2)]
|
||||||
"in2": "fastq_2",
|
[input_1: state.fastq_1, input_2: state.fastq_2] + outputState
|
||||||
"merge": "fastp_save_merged",
|
[ in1: state.fastq_1,
|
||||||
"interleaved_in": "interleaved_reads",
|
in2: state.fastq_2,
|
||||||
"detect_adapter_for_pe": "fastp_pe_detect_adapter",
|
merge: state.fastp_save_merged,
|
||||||
"adapter_fasta": "fastp_adapter_fasta"
|
interleaved_in: state.interleaved_reads,
|
||||||
],
|
detect_adapter_for_pe: state.paired,
|
||||||
|
adapter_fasta: state.fastp_adapter_fasta ] + outputState
|
||||||
|
},
|
||||||
toState: [
|
toState: [
|
||||||
"fastq_1": "out1",
|
"fastq_1": "out1",
|
||||||
"fastq_2": "out2",
|
"fastq_2": "out2",
|
||||||
@@ -102,19 +116,23 @@ workflow run_wf {
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Perform FASTQC on reads trimmed using fastp
|
// Perform FASTQC on reads trimmed using fastp
|
||||||
| fastqc.run(
|
| fastqc.run (
|
||||||
runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" },
|
runIf: { id, state -> !state.skip_trimming && state.trimmer == "fastp" },
|
||||||
fromState: { id, state ->
|
fromState: { id, state ->
|
||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
||||||
[ paired: state.paired,
|
[ input: input ]
|
||||||
input: input ]
|
},
|
||||||
},
|
toState: {id, output_state, state ->
|
||||||
toState: [
|
def newKeys = [
|
||||||
"trim_html_1": "fastqc_html_1",
|
"trim_html_1":output_state["html"][0],
|
||||||
"trim_html_2": "fastqc_html_2",
|
"trim_html_2": output_state["html"][1],
|
||||||
"trim_zip_1": "fastqc_zip_1",
|
"trim_zip_1": output_state["zip"][0],
|
||||||
"trim_zip_2": "fastqc_zip_2"
|
"trim_zip_2": output_state["zip"][1]
|
||||||
],
|
]
|
||||||
|
def new_state = state + newKeys
|
||||||
|
return new_state
|
||||||
|
},
|
||||||
|
args: [html: "*.html", zip: "*.zip"],
|
||||||
key: "fastqc_trimming"
|
key: "fastqc_trimming"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -125,7 +143,7 @@ workflow run_wf {
|
|||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
||||||
[ paired: state.paired,
|
[ paired: state.paired,
|
||||||
input: input,
|
input: input,
|
||||||
built_bbsplit_index: state.bbsplit_index ]
|
build: state.bbsplit_index ]
|
||||||
},
|
},
|
||||||
args: ["only_build_index": false],
|
args: ["only_build_index": false],
|
||||||
toState: [
|
toState: [
|
||||||
@@ -141,27 +159,44 @@ workflow run_wf {
|
|||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
||||||
def filePaths = state.ribo_database_manifest.readLines()
|
def filePaths = state.ribo_database_manifest.readLines()
|
||||||
def refs = filePaths.collect { it }
|
def refs = filePaths.collect { it }
|
||||||
[ paired: state.paired,
|
def other = "${id}_non_rRNA_reads/"
|
||||||
|
[ paired_in: state.paired,
|
||||||
input: input,
|
input: input,
|
||||||
ribo_database_manifest: refs ]
|
ref: refs,
|
||||||
|
out2: state.paired,
|
||||||
|
other: other ]
|
||||||
},
|
},
|
||||||
toState: [
|
args: [fastx: true, num_alignments: 1],
|
||||||
"fastq_1": "fastq_1",
|
toState: { id, output_state, state ->
|
||||||
"fastq_2": "fastq_2",
|
def newKeys = [
|
||||||
"sortmerna_log": "sortmerna_log"
|
"sortmerna_output": output_state["other"],
|
||||||
]
|
"sortmerna_log": output_state["log"]
|
||||||
|
]
|
||||||
|
def new_state = state + newKeys
|
||||||
|
return new_state
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
| map { id, state ->
|
||||||
|
if (state.remove_ribo_rna) {
|
||||||
|
def fastq_1 = state.sortmerna_output.listFiles().find{it.name == "other_fwd.fq.gz"}
|
||||||
|
def fastq_2 = state.sortmerna_output.listFiles().find{it.name == "other_rev.fq.gz"}
|
||||||
|
[ id, state + [fastq_1: fastq_1, fastq_2: fastq_2] ]
|
||||||
|
} else {
|
||||||
|
[ id, state ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness
|
// Sub-sample FastQ files and pseudo-align with Salmon to auto-infer strandedness
|
||||||
| fq_subsample.run (
|
| fq_subsample.run (
|
||||||
runIf: { id, state -> state.strandedness == 'auto' },
|
runIf: { id, state -> state.strandedness == 'auto' },
|
||||||
fromState: { id, state ->
|
fromState: { id, state ->
|
||||||
def input = state.paired ? [ state.fastq_1, state.fastq_2 ] : [ state.fastq_1 ]
|
def outputState = state.paired ? [output_1: state.qc_output1, output_2: state.qc_output2] : [output_1: state.qc_output1, output_2: state.remove(state.qc_output2)]
|
||||||
[
|
[input_1: state.fastq_1, input_2: state.fastq_2] + outputState
|
||||||
input: input,
|
|
||||||
extra_args: state.extra_fq_subsample_args
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
|
args: [
|
||||||
|
record_count: 1000,
|
||||||
|
seed: 1
|
||||||
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"subsampled_fastq_1": "output_1",
|
"subsampled_fastq_1": "output_1",
|
||||||
"subsampled_fastq_2": "output_2"
|
"subsampled_fastq_2": "output_2"
|
||||||
@@ -187,6 +222,7 @@ workflow run_wf {
|
|||||||
)
|
)
|
||||||
[ id, state + [lib_type: lib_type] ]
|
[ id, state + [lib_type: lib_type] ]
|
||||||
}
|
}
|
||||||
|
|
||||||
| salmon_quant.run (
|
| salmon_quant.run (
|
||||||
runIf: { id, state -> state.strandedness == 'auto' },
|
runIf: { id, state -> state.strandedness == 'auto' },
|
||||||
fromState: { id, state ->
|
fromState: { id, state ->
|
||||||
@@ -204,17 +240,17 @@ workflow run_wf {
|
|||||||
toState: [ "salmon_quant_output": "output" ]
|
toState: [ "salmon_quant_output": "output" ]
|
||||||
)
|
)
|
||||||
|
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def mod_state = (!state.paired) ?
|
def mod_state = (!state.paired) ?
|
||||||
[trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] :
|
[trim_log_2: state.remove(state.trim_log_2), trim_zip_2: state.remove(state.trim_zip_2), trim_html_2: state.remove(state.trim_html_2), failed_trim_unpaired2: state.remove(state.failed_trim_unpaired2)] :
|
||||||
[]
|
[]
|
||||||
[ id, state + mod_state ]
|
[ id, state + mod_state ]
|
||||||
}
|
}
|
||||||
|
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
||||||
[ id, mod_state ]
|
[ id, mod_state ]
|
||||||
}
|
}
|
||||||
|
|
||||||
| setState (
|
| setState (
|
||||||
"fastqc_html_1": "fastqc_html_1",
|
"fastqc_html_1": "fastqc_html_1",
|
||||||
@@ -230,9 +266,6 @@ workflow run_wf {
|
|||||||
"trim_html_1": "trim_html_1",
|
"trim_html_1": "trim_html_1",
|
||||||
"trim_html_2": "trim_html_2",
|
"trim_html_2": "trim_html_2",
|
||||||
"sortmerna_log": "sortmerna_log",
|
"sortmerna_log": "sortmerna_log",
|
||||||
"failed_trim": "failed_trim",
|
|
||||||
"failed_trim_unpaired1": "failed_trim_unpaired1",
|
|
||||||
"failed_trim_unpaired2": "failed_trim_unpaired2",
|
|
||||||
"trim_json": "trim_json",
|
"trim_json": "trim_json",
|
||||||
"trim_html": "trim_html",
|
"trim_html": "trim_html",
|
||||||
"trim_merged_out": "trim_merged_out",
|
"trim_merged_out": "trim_merged_out",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
viash ns build --parallel --setup cb
|
viash ns build --parallel --setup cb #-q pre_processing
|
||||||
|
|
||||||
echo "> Preparing reference data files"
|
echo "> Preparing reference data files"
|
||||||
gunzip --keep testData/minimal_test/reference/genes.gtf.gz
|
gunzip --keep testData/minimal_test/reference/genes.gtf.gz
|
||||||
@@ -24,10 +24,11 @@ nextflow run target/nextflow/workflows/pre_processing/main.nf \
|
|||||||
--salmon_index testData/minimal_test/reference/salmon_index \
|
--salmon_index testData/minimal_test/reference/salmon_index \
|
||||||
--skip_trimming false \
|
--skip_trimming false \
|
||||||
--trimmer trimgalore \
|
--trimmer trimgalore \
|
||||||
--remove_ribo_rna false \
|
--remove_ribo_rna true \
|
||||||
--ribo_database_manifest src/assets/rrna-db-defaults.txt \
|
--ribo_database_manifest testData/minimal_test/reference/rrna-db-defaults.txt \
|
||||||
--skip_bbsplit true \
|
--skip_bbsplit true \
|
||||||
--bbsplit_index test_results/prepare_genome_test1/BBSplit_index \
|
--bbsplit_index test_results/prepare_genome_test1/BBSplit_index \
|
||||||
|
--with_umi false \
|
||||||
-profile docker \
|
-profile docker \
|
||||||
-resume
|
-resume
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,8 @@ argument_groups:
|
|||||||
description: Skip BBSplit for removal of non-reference genome reads.
|
description: Skip BBSplit for removal of non-reference genome reads.
|
||||||
- name: "--bbsplit_fasta_list"
|
- name: "--bbsplit_fasta_list"
|
||||||
type: file
|
type: file
|
||||||
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)
|
description: List of reference genomes (separated by ";") to filter reads against with BBSplit.
|
||||||
|
multiple: true
|
||||||
- name: "--star_index"
|
- name: "--star_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built STAR index.
|
description: Path to directory or tar.gz archive for pre-built STAR index.
|
||||||
@@ -45,18 +46,12 @@ argument_groups:
|
|||||||
- name: "--rsem_index"
|
- name: "--rsem_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built RSEM index.
|
description: Path to directory or tar.gz archive for pre-built RSEM index.
|
||||||
- name: extra_rsem_prepare_reference_args
|
|
||||||
type: string
|
|
||||||
description: Extra arguments to pass to rsem-prepare-reference command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--salmon_index"
|
- name: "--salmon_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built Salmon index.
|
description: Path to directory or tar.gz archive for pre-built Salmon index.
|
||||||
- name: "--kallisto_index"
|
- name: "--kallisto_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built Kallisto index.
|
description: Path to directory or tar.gz archive for pre-built Kallisto index.
|
||||||
# - name: "--hisat2_index"
|
|
||||||
# type: file
|
|
||||||
# description: Path to directory or tar.gz archive for pre-built HISAT2 index.
|
|
||||||
- name: "--bbsplit_index"
|
- name: "--bbsplit_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built BBSplit index.
|
description: Path to directory or tar.gz archive for pre-built BBSplit index.
|
||||||
@@ -125,10 +120,6 @@ argument_groups:
|
|||||||
direction: output
|
direction: output
|
||||||
description: Path to Kallisto index.
|
description: Path to Kallisto index.
|
||||||
default: Kallisto_index
|
default: Kallisto_index
|
||||||
# - name: "--hisat2_index_uncompressed"
|
|
||||||
# type: file
|
|
||||||
# direction: output
|
|
||||||
# description: Path to directory or tar.gz archive for pre-built HISAT2 index.
|
|
||||||
- name: "--bbsplit_index_uncompressed"
|
- name: "--bbsplit_index_uncompressed"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
@@ -165,11 +156,12 @@ dependencies:
|
|||||||
repository: craftbox
|
repository: craftbox
|
||||||
- name: star/star_genome_generate
|
- name: star/star_genome_generate
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: bbmap_bbsplit
|
- name: bbmap/bbmap_bbsplit
|
||||||
|
repository: biobox
|
||||||
- name: salmon/salmon_index
|
- name: salmon/salmon_index
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: kallisto/kallisto_index
|
- name: kallisto/kallisto_index
|
||||||
# repository: biobox
|
repository: biobox
|
||||||
|
|
||||||
runners:
|
runners:
|
||||||
- type: executable
|
- type: executable
|
||||||
|
|||||||
@@ -138,43 +138,45 @@ workflow run_wf {
|
|||||||
[ id, state + [transcript_fasta: transcript_fasta] ]
|
[ id, state + [transcript_fasta: transcript_fasta] ]
|
||||||
}
|
}
|
||||||
|
|
||||||
// chromosome size and fai index
|
// chromosome size and fai index
|
||||||
| getchromsizes.run (
|
| getchromsizes.run (
|
||||||
fromState: [ "fasta": "fasta" ],
|
fromState: [ "fasta": "fasta" ],
|
||||||
toState: [
|
toState: [
|
||||||
"fai": "fai",
|
"fai": "fai",
|
||||||
"sizes": "sizes"
|
"sizes": "sizes"
|
||||||
],
|
],
|
||||||
key: "chromsizes",
|
key: "chromsizes",
|
||||||
args: [
|
args: [
|
||||||
fai: "genome_additional.fasta.fai",
|
fai: "genome_additional.fasta.fai",
|
||||||
sizes: "genome_additional.fasta.sizes"
|
sizes: "genome_additional.fasta.sizes"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
// untar bbsplit index, if available
|
// untar bbsplit index, if available
|
||||||
| untar.run (
|
| untar.run (
|
||||||
runIf: {id, state -> state.bbsplit_index},
|
runIf: {id, state -> state.bbsplit_index},
|
||||||
fromState: [ "input": "bbsplit_index" ],
|
fromState: [ "input": "bbsplit_index" ],
|
||||||
toState: [ "bbsplit_index": "output" ],
|
toState: [ "bbsplit_index": "output" ],
|
||||||
key: "untar_bbsplit_index",
|
key: "untar_bbsplit_index",
|
||||||
args: [output: "BBSplit_index"]
|
args: [output: "BBSplit_index"]
|
||||||
)
|
)
|
||||||
|
|
||||||
// create bbsplit index, if not already availble
|
| map {id, state ->
|
||||||
| bbmap_bbsplit.run (
|
def ref = [state.fasta] + state.bbsplit_fasta_list
|
||||||
runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index},
|
[id, state + [bbsplit_ref: ref] ]
|
||||||
fromState: [
|
}
|
||||||
"primary_ref": "fasta",
|
|
||||||
"bbsplit_fasta_list": "bbsplit_fasta_list"
|
// create bbsplit index, if not already availble
|
||||||
],
|
| bbmap_bbsplit.run (
|
||||||
toState: [ "bbsplit_index": "bbsplit_index" ],
|
runIf: {id, state -> !state.skip_bbsplit && !state.bbsplit_index},
|
||||||
args: [
|
fromState: ["ref": "bbsplit_ref"],
|
||||||
only_build_index: true,
|
toState: [ "bbsplit_index": "index" ],
|
||||||
bbsplit_index: "BBSplit_index"
|
args: [
|
||||||
],
|
only_build_index: true,
|
||||||
key: "generate_bbsplit_index"
|
index: "BBSplit_index"
|
||||||
)
|
],
|
||||||
|
key: "generate_bbsplit_index"
|
||||||
|
)
|
||||||
|
|
||||||
// Uncompress STAR index or generate from scratch if required
|
// Uncompress STAR index or generate from scratch if required
|
||||||
| untar.run (
|
| untar.run (
|
||||||
@@ -251,16 +253,16 @@ workflow run_wf {
|
|||||||
args: [output: "Kallisto_index"]
|
args: [output: "Kallisto_index"]
|
||||||
)
|
)
|
||||||
|
|
||||||
| kallisto_index.run(
|
| kallisto_index.run(
|
||||||
runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index},
|
runIf: {id, state -> state.pseudo_aligner == "kallisto" && !state.kallisto_index},
|
||||||
fromState: [
|
fromState: [
|
||||||
"transcriptome_fasta": "transcript_fasta",
|
"input": "transcript_fasta",
|
||||||
"pseudo_aligner_kmer_size": "pseudo_aligner_kmer_size"
|
"kmer_size": "pseudo_aligner_kmer_size"
|
||||||
],
|
],
|
||||||
toState: [ "kallisto_index": "kallisto_index" ],
|
toState: [ "kallisto_index": "index" ],
|
||||||
key: "generate_kallisto_index",
|
key: "generate_kallisto_index",
|
||||||
args: [kallisto_index: "Kallisto_index"]
|
args: [index: "Kallisto_index"]
|
||||||
)
|
)
|
||||||
|
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# viash ns build --setup cb --parallel -q prepare_genome
|
viash ns build --setup cb --parallel
|
||||||
|
|
||||||
# echo "Test 1: Annotation file format - GTF"
|
# echo "Test 1: Annotation file format - GTF"
|
||||||
# nextflow run target/nextflow/workflows/prepare_genome/main.nf \
|
# nextflow run target/nextflow/workflows/prepare_genome/main.nf \
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
||||||
# --genotype false \
|
# --genotype false \
|
||||||
# --biotype gene_biotype \
|
# --biotype gene_biotype \
|
||||||
# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
|
# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
|
||||||
# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
|
# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
|
||||||
# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
|
# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
|
||||||
# -profile docker \
|
# -profile docker \
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
||||||
# --genotype false \
|
# --genotype false \
|
||||||
# --biotype gene_biotype \
|
# --biotype gene_biotype \
|
||||||
# --bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
|
# --bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
|
||||||
# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
|
# --salmon_index testData/minimal_test/reference/salmon.tar.gz \
|
||||||
# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
|
# --rsem_index testData/minimal_test/reference/rsem.tar.gz \
|
||||||
# -profile docker \
|
# -profile docker \
|
||||||
@@ -43,7 +43,7 @@ nextflow run target/nextflow/workflows/prepare_genome/main.nf \
|
|||||||
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
|
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
|
||||||
--genotype false \
|
--genotype false \
|
||||||
--biotype gene_biotype \
|
--biotype gene_biotype \
|
||||||
--bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
|
--bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
|
||||||
--pseudo_aligner kallisto \
|
--pseudo_aligner kallisto \
|
||||||
--aligner star_rsem \
|
--aligner star_rsem \
|
||||||
-profile docker \
|
-profile docker \
|
||||||
|
|||||||
@@ -4,73 +4,73 @@ description: |
|
|||||||
A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.
|
A viash sub-workflow for pseudo alignment and quantification stage of nf-core/rnaseq pipeline.
|
||||||
|
|
||||||
argument_groups:
|
argument_groups:
|
||||||
- name: "Input"
|
- name: "Input"
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--id"
|
- name: "--id"
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
description: ID of the sample.
|
description: ID of the sample.
|
||||||
example: foo
|
example: foo
|
||||||
- name: "--fastq_1"
|
- name: "--fastq_1"
|
||||||
alternatives: [-i]
|
alternatives: [-i]
|
||||||
type: file
|
type: file
|
||||||
description: Path to the sample (or read 1 of paired end sample).
|
description: Path to the sample (or read 1 of paired end sample).
|
||||||
required: true
|
required: true
|
||||||
example: input.fastq.gz
|
example: input.fastq.gz
|
||||||
- name: "--fastq_2"
|
- name: "--fastq_2"
|
||||||
type: file
|
type: file
|
||||||
required: false
|
required: false
|
||||||
description: Path to read 2 of the sample.
|
description: Path to read 2 of the sample.
|
||||||
- name: "--strandedness"
|
- name: "--strandedness"
|
||||||
type: string
|
type: string
|
||||||
required: false
|
required: false
|
||||||
description: Sample strand-specificity. Must be one of unstranded, forward, or reverse
|
description: Sample strand-specificity. Must be one of unstranded, forward, or reverse
|
||||||
choices: [forward, reverse, unstranded]
|
choices: [forward, reverse, unstranded]
|
||||||
- name: "--gtf"
|
- name: "--gtf"
|
||||||
type: file
|
type: file
|
||||||
description: GTF file
|
description: GTF file
|
||||||
- name: "--transcript_fasta"
|
- name: "--transcript_fasta"
|
||||||
type: file
|
type: file
|
||||||
description: Fasta file of the reference transcriptome.
|
description: Fasta file of the reference transcriptome.
|
||||||
- name: "--pseudo_aligner"
|
- name: "--pseudo_aligner"
|
||||||
type: string
|
type: string
|
||||||
default: false
|
default: false
|
||||||
description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.
|
description: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'.
|
||||||
choices: [salmon, kallisto]
|
choices: [salmon, kallisto]
|
||||||
- name: "--salmon_index"
|
- name: "--salmon_index"
|
||||||
type: file
|
type: file
|
||||||
description: Salmon index
|
description: Salmon index
|
||||||
- name: "--kallisto_index"
|
- name: "--kallisto_index"
|
||||||
type: file
|
type: file
|
||||||
description: Kallisto index
|
description: Kallisto index
|
||||||
- name: "--lib_type"
|
- name: "--lib_type"
|
||||||
type: string
|
type: string
|
||||||
description: Override library type inferred based on strandedness defined in meta object
|
description: Override library type inferred based on strandedness defined in meta object
|
||||||
default: ''
|
default: ''
|
||||||
- name: "--kallisto_quant_fragment_length"
|
- name: "--kallisto_quant_fragment_length"
|
||||||
type: integer
|
type: double
|
||||||
description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.
|
description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.
|
||||||
- name: "--kallisto_quant_fragment_length_sd"
|
- name: "--kallisto_quant_fragment_length_sd"
|
||||||
type: integer
|
type: double
|
||||||
description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.
|
description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.
|
||||||
|
|
||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--pseudo_multiqc"
|
- name: "--pseudo_multiqc"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
- name: "--quant_out_dir"
|
- name: "--quant_out_dir"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.quant
|
default: $id.quant
|
||||||
- name: "--salmon_quant_results_file"
|
- name: "--salmon_quant_results_file"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.quant.sf
|
default: $id.quant.sf
|
||||||
- name: "--kallisto_quant_results_file"
|
- name: "--kallisto_quant_results_file"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
default: $id.abundance.tsv
|
default: $id.abundance.tsv
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
- type: nextflow_script
|
- type: nextflow_script
|
||||||
@@ -81,6 +81,7 @@ dependencies:
|
|||||||
- name: salmon/salmon_quant
|
- name: salmon/salmon_quant
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: kallisto/kallisto_quant
|
- name: kallisto/kallisto_quant
|
||||||
|
repository: biobox
|
||||||
|
|
||||||
runners:
|
runners:
|
||||||
- type: executable
|
- type: executable
|
||||||
|
|||||||
@@ -57,22 +57,32 @@ workflow run_wf {
|
|||||||
[ id, mod_state ]
|
[ id, mod_state ]
|
||||||
}
|
}
|
||||||
|
|
||||||
| kallisto_quant.run (
|
| kallisto_quant.run (
|
||||||
runIf: { id, state -> state.pseudo_aligner == 'kallisto'},
|
runIf: { id, state -> state.pseudo_aligner == 'kallisto'},
|
||||||
fromState: [
|
fromState: { id, state ->
|
||||||
"input": "input",
|
def fr_stranded = state.strandedness == 'forward'
|
||||||
"paired": "paired",
|
def rf_stranded = state.strandedness == 'reverse'
|
||||||
"gtf": "gtf",
|
[
|
||||||
"index": "kallisto_index",
|
input: state.input,
|
||||||
"fragment_length": "kallisto_quant_fragment_length",
|
index: state.kallisto_index,
|
||||||
"fragment_length_sd": "kallisto_quant_fragment_length_sd"
|
fragment_length: state.kallisto_quant_fragment_length,
|
||||||
],
|
sd: state.kallisto_quant_fragment_length_sd,
|
||||||
toState: [
|
single: !state.paired,
|
||||||
"quant_out_dir": "output",
|
fr_stranded: fr_stranded,
|
||||||
"kallisto_quant_results_file": "quant_results_file",
|
rf_stranded: rf_stranded,
|
||||||
"pseudo_multiqc": "log"
|
|
||||||
]
|
]
|
||||||
)
|
},
|
||||||
|
args: [log: "kallisto_quant.log"],
|
||||||
|
toState: { id, output_state, state ->
|
||||||
|
def neKeys = [
|
||||||
|
"quant_out_dir": output_state["output_dir"],
|
||||||
|
"kallisto_quant_results_file": output_state["output_dir"] + "/abundance.tsv",
|
||||||
|
"pseudo_multiqc": output_state["log"]
|
||||||
|
]
|
||||||
|
def new_state = state + newKeys
|
||||||
|
return new_state
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
def mod_state = state.findAll { key, value -> value instanceof java.nio.file.Path && value.exists() }
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# viash ns build --setup cb -q pseudo_alignment_and_quant
|
viash ns build --setup cb --parallel #-q pseudo_alignment_and_quant
|
||||||
|
|
||||||
# Split error message from standard output
|
# Split error message from standard output
|
||||||
# viash ns list > /dev/null
|
# viash ns list > /dev/null
|
||||||
@@ -16,30 +16,32 @@ WT_REP1,SRR6357070_1.fastq.gz,SRR6357070_2.fastq.gz,reverse
|
|||||||
RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
|
RAP1_UNINDUCED_REP1,SRR6357073_1.fastq.gz,,reverse
|
||||||
HERE
|
HERE
|
||||||
|
|
||||||
echo "> Test 1: Salmon qunatification"
|
# echo "> Test 1: Salmon qunatification"
|
||||||
nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
|
|
||||||
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
|
|
||||||
--publish_dir "test_results/pseudo_alignment_test1" \
|
|
||||||
--fasta testData/minimal_test/reference/genome.fasta \
|
|
||||||
--gtf testData/minimal_test/reference/genes.gtf.gz \
|
|
||||||
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
|
||||||
--salmon_index testData/minimal_test/reference/salmon_index \
|
|
||||||
--pseudo_aligner salmon \
|
|
||||||
-profile docker \
|
|
||||||
-resume
|
|
||||||
|
|
||||||
# echo "> Test 2: Kallisto qunatification"
|
|
||||||
# nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
|
# nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
|
||||||
# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
|
# --param_list testData/minimal_test/input_fastq/sample_sheet.csv \
|
||||||
# --publish_dir "test_results/pseudo_alignment_test2" \
|
# --publish_dir "test_results/pseudo_alignment_test1" \
|
||||||
# --fasta testData/minimal_test/reference/genome.fasta \
|
# --fasta testData/minimal_test/reference/genome.fasta \
|
||||||
# --gtf testData/minimal_test/reference/genes.gtf.gz \
|
# --gtf testData/minimal_test/reference/genes.gtf.gz \
|
||||||
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
# --transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
||||||
# --kallisto_index test_results/prepare_genome_test3/Kallisto_index \
|
# --salmon_index testData/minimal_test/reference/salmon_index \
|
||||||
# --pseudo_aligner kallisto \
|
# --pseudo_aligner salmon \
|
||||||
# -profile docker \
|
# -profile docker \
|
||||||
# -resume
|
# -resume
|
||||||
|
|
||||||
|
echo "> Test 2: Kallisto qunatification"
|
||||||
|
nextflow run target/nextflow/workflows/pseudo_alignment_and_quant/main.nf \
|
||||||
|
--param_list testData/minimal_test/input_fastq/sample_sheet.csv \
|
||||||
|
--publish_dir "test_results/pseudo_alignment_test2" \
|
||||||
|
--fasta testData/minimal_test/reference/genome.fasta \
|
||||||
|
--gtf testData/minimal_test/reference/genes.gtf.gz \
|
||||||
|
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
||||||
|
--kallisto_index test_results/prepare_genome_test3/Kallisto_index \
|
||||||
|
--pseudo_aligner kallisto \
|
||||||
|
--kallisto_quant_fragment_length 101.0 \
|
||||||
|
--kallisto_quant_fragment_length_sd 50.0 \
|
||||||
|
-profile docker \
|
||||||
|
-resume
|
||||||
|
|
||||||
echo "Removing reference data files"
|
echo "Removing reference data files"
|
||||||
rm testData/minimal_test/reference/genes.gtf
|
rm testData/minimal_test/reference/genes.gtf
|
||||||
rm -r testData/minimal_test/reference/salmon_index
|
rm -r testData/minimal_test/reference/salmon_index
|
||||||
|
|||||||
@@ -112,9 +112,6 @@ argument_groups:
|
|||||||
- name: "--biotype"
|
- name: "--biotype"
|
||||||
type: string
|
type: string
|
||||||
description: Biotype value to use while appending entries to GTF file when additional fasta file is provided.
|
description: Biotype value to use while appending entries to GTF file when additional fasta file is provided.
|
||||||
- name: "--extra_featurecounts_args"
|
|
||||||
type: string
|
|
||||||
description: Extra arguments to pass to featureCounts command in addition to defaults defined by the pipeline
|
|
||||||
|
|
||||||
# RSeQC
|
# RSeQC
|
||||||
- name: "--rseqc_modules"
|
- name: "--rseqc_modules"
|
||||||
@@ -207,12 +204,6 @@ argument_groups:
|
|||||||
description: Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads.
|
description: Set flag to subtract background noise (estimated from intronic reads) to determine tin. Only use this option if there are substantial intronic reads.
|
||||||
|
|
||||||
# Qualimap
|
# Qualimap
|
||||||
- name: "--output_format"
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
default: html
|
|
||||||
choices: [ html, pdf ]
|
|
||||||
description: Format of the qualimap output report (PDF or HTML, default is HTML)
|
|
||||||
- name: "--pr_bases"
|
- name: "--pr_bases"
|
||||||
type: integer
|
type: integer
|
||||||
required: false
|
required: false
|
||||||
@@ -291,8 +282,6 @@ argument_groups:
|
|||||||
- name: "--star_multiqc"
|
- name: "--star_multiqc"
|
||||||
type: file
|
type: file
|
||||||
must_exist: false
|
must_exist: false
|
||||||
# - name: "--hisat2_multiqc"
|
|
||||||
# type: file
|
|
||||||
- name: "--rsem_multiqc"
|
- name: "--rsem_multiqc"
|
||||||
type: file
|
type: file
|
||||||
- name: "--genome_bam_stats"
|
- name: "--genome_bam_stats"
|
||||||
@@ -503,17 +492,20 @@ argument_groups:
|
|||||||
default: $id.intercept_slope.txt
|
default: $id.intercept_slope.txt
|
||||||
|
|
||||||
# Qualimap
|
# Qualimap
|
||||||
- name: "--qualimap_output_pdf"
|
- name: "--qualimap_qc_report"
|
||||||
|
direction: output
|
||||||
|
type: file
|
||||||
|
example: $id.rnaseq_qc_results.txt
|
||||||
|
description: Text file containing the RNAseq QC results.
|
||||||
|
- name: "--qualimap_counts"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
description: Output file for computed counts.
|
||||||
must_exist: false
|
- name: "--qualimap_report"
|
||||||
default: $id.qualimap_output.pdf
|
|
||||||
- name: "--qualimap_output_dir"
|
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
example: $id.report.html
|
||||||
default: $id.qualimap_output
|
description: Report output file. Supported formats are PDF or HTML.
|
||||||
|
|
||||||
# DESeq2
|
# DESeq2
|
||||||
- name: "--deseq2_output"
|
- name: "--deseq2_output"
|
||||||
@@ -626,17 +618,19 @@ resources:
|
|||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: rseqc/rseqc_bamstat
|
- name: rseqc/rseqc_bamstat
|
||||||
|
repository: biobox
|
||||||
- name: rseqc/rseqc_inferexperiment
|
- name: rseqc/rseqc_inferexperiment
|
||||||
- name: rseqc/rseqc_innerdistance
|
repository: biobox
|
||||||
|
- name: rseqc/rseqc_inner_distance
|
||||||
|
repository: biobox
|
||||||
- name: rseqc/rseqc_junctionannotation
|
- name: rseqc/rseqc_junctionannotation
|
||||||
- name: rseqc/rseqc_junctionsaturation
|
- name: rseqc/rseqc_junctionsaturation
|
||||||
- name: rseqc/rseqc_readdistribution
|
- name: rseqc/rseqc_readdistribution
|
||||||
- name: rseqc/rseqc_readduplication
|
- name: rseqc/rseqc_readduplication
|
||||||
- name: rseqc/rseqc_tin
|
- name: rseqc/rseqc_tin
|
||||||
- name: dupradar
|
- name: dupradar
|
||||||
- name: qualimap
|
- name: qualimap/qualimap_rnaseq
|
||||||
# - name: qualimap/qualimap_rnaseq
|
repository: biobox
|
||||||
# repository: biobox
|
|
||||||
- name: preseq_lcextrap
|
- name: preseq_lcextrap
|
||||||
- name: featurecounts
|
- name: featurecounts
|
||||||
repository: biobox
|
repository: biobox
|
||||||
@@ -645,7 +639,7 @@ dependencies:
|
|||||||
- name: prepare_multiqc_input
|
- name: prepare_multiqc_input
|
||||||
- name: multiqc
|
- name: multiqc
|
||||||
repository: biobox
|
repository: biobox
|
||||||
- name: rsem/rsem_merge_counts
|
- name: rsem_merge_counts
|
||||||
- name: workflows/merge_quant_results
|
- name: workflows/merge_quant_results
|
||||||
|
|
||||||
runners:
|
runners:
|
||||||
|
|||||||
@@ -39,145 +39,145 @@ workflow run_wf {
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
| multiqc_custom_biotype.run (
|
| multiqc_custom_biotype.run (
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_biotype_qc && state.biotype && state.featurecounts && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"id": "id",
|
"id": "id",
|
||||||
"biocounts": "featurecounts",
|
"biocounts": "featurecounts",
|
||||||
"biotypes_header": "biotypes_header"
|
"biotypes_header": "biotypes_header"
|
||||||
],
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"featurecounts_multiqc": "featurecounts_multiqc",
|
"featurecounts_multiqc": "featurecounts_multiqc",
|
||||||
"featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc"
|
"featurecounts_rrna_multiqc": "featurecounts_rrna_multiqc"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
| preseq_lcextrap.run (
|
| preseq_lcextrap.run (
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_preseq && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"paired": "paired",
|
"paired": "paired",
|
||||||
"input": "genome_bam",
|
"input": "genome_bam",
|
||||||
"extra_preseq_args": "extra_preseq_args"
|
"extra_preseq_args": "extra_preseq_args"
|
||||||
],
|
],
|
||||||
toState: [ "preseq_output": "output" ]
|
toState: [ "preseq_output": "output" ]
|
||||||
)
|
)
|
||||||
|
|
||||||
| rseqc_bamstat.run (
|
| rseqc_bamstat.run (
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "bam_stat" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input_file": "genome_bam",
|
||||||
"map_qual": "map_qual"
|
"mapq": "map_qual"
|
||||||
],
|
],
|
||||||
toState: [ "bamstat_output": "output" ]
|
toState: [ "bamstat_output": "output" ]
|
||||||
)
|
)
|
||||||
| rseqc_inferexperiment.run(
|
| rseqc_inferexperiment.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "infer_experiment" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input_file": "genome_bam",
|
||||||
"refgene": "gene_bed",
|
"refgene": "gene_bed",
|
||||||
"sample_size": "sample_size",
|
"sample_size": "sample_size",
|
||||||
"map_qual": "map_qual"
|
"mapq": "map_qual"
|
||||||
],
|
],
|
||||||
toState: [ "strandedness_output": "output" ]
|
toState: [ "strandedness_output": "output" ]
|
||||||
)
|
)
|
||||||
// Get predicted strandedness from the RSeQC infer_experiment.py output
|
// Get predicted strandedness from the RSeQC infer_experiment.py output
|
||||||
| map { id, state ->
|
| map { id, state ->
|
||||||
def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30)
|
def inferred_strand = getInferexperimentStrandedness(state.strandedness_output, 30)
|
||||||
def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true
|
def passed_strand_check = (state.strandedness != inferred_strand[0]) ? false : true
|
||||||
[ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ]
|
[ id, state + [ inferred_strand: inferred_strand, passed_strand_check: passed_strand_check ] ]
|
||||||
}
|
}
|
||||||
| rseqc_innerdistance.run(
|
| rseqc_inner_distance.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && state.paired && "inner_distance" in state.rseqc_modules && !state.skip_align },
|
||||||
key: "inner_distance",
|
key: "inner_distance",
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input_file": "genome_bam",
|
||||||
"refgene": "gene_bed",
|
"refgene": "gene_bed",
|
||||||
"sample_size": "sample_size",
|
"sample_size": "sample_size",
|
||||||
"map_qual": "map_qual",
|
"mapq": "map_qual",
|
||||||
"lower_bound_size": "lower_bound_size",
|
"lower_bound": "lower_bound_size",
|
||||||
"upper_bound_size": "upper_bound_size",
|
"upper_bound": "upper_bound_size",
|
||||||
"step_size": "step_size"
|
"step": "step_size"
|
||||||
],
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"inner_dist_output_stats": "output_stats",
|
"inner_dist_output_stats": "output_stats",
|
||||||
"inner_dist_output_dist": "output_dist",
|
"inner_dist_output_dist": "output_dist",
|
||||||
"inner_dist_output_freq": "output_freq",
|
"inner_dist_output_freq": "output_freq",
|
||||||
"inner_dist_output_plot": "output_plot",
|
"inner_dist_output_plot": "output_plot",
|
||||||
"inner_dist_output_plot_r": "output_plot_r"
|
"inner_dist_output_plot_r": "output_plot_r"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
| rseqc_junctionannotation.run(
|
| rseqc_junctionannotation.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_annotation" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input": "genome_bam",
|
||||||
"refgene": "gene_bed",
|
"refgene": "gene_bed",
|
||||||
"map_qual": "map_qual",
|
"map_qual": "map_qual",
|
||||||
"min_intron": "min_intron"
|
"min_intron": "min_intron"
|
||||||
],
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"junction_annotation_output_log": "output_log",
|
"junction_annotation_output_log": "output_log",
|
||||||
"junction_annotation_output_plot_r": "output_plot_r",
|
"junction_annotation_output_plot_r": "output_plot_r",
|
||||||
"junction_annotation_output_junction_bed": "output_junction_bed",
|
"junction_annotation_output_junction_bed": "output_junction_bed",
|
||||||
"junction_annotation_output_junction_interact": "output_junction_interact",
|
"junction_annotation_output_junction_interact": "output_junction_interact",
|
||||||
"junction_annotation_output_junction_sheet": "output_junction_sheet",
|
"junction_annotation_output_junction_sheet": "output_junction_sheet",
|
||||||
"junction_annotation_output_splice_events_plot": "output_splice_events_plot",
|
"junction_annotation_output_splice_events_plot": "output_splice_events_plot",
|
||||||
"junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot"
|
"junction_annotation_output_splice_junctions_plot": "output_splice_junctions_plot"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
| rseqc_junctionsaturation.run(
|
| rseqc_junctionsaturation.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "junction_saturation" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input": "genome_bam",
|
||||||
"refgene": "gene_bed",
|
"refgene": "gene_bed",
|
||||||
"sampling_percentile_lower_bound": "sampling_percentile_lower_bound",
|
"sampling_percentile_lower_bound": "sampling_percentile_lower_bound",
|
||||||
"sampling_percentile_upper_bound": "sampling_percentile_upper_bound",
|
"sampling_percentile_upper_bound": "sampling_percentile_upper_bound",
|
||||||
"sampling_percentile_step": "sampling_percentile_step",
|
"sampling_percentile_step": "sampling_percentile_step",
|
||||||
"min_intron": "min_intron",
|
"min_intron": "min_intron",
|
||||||
"min_splice_read": "min_splice_read",
|
"min_splice_read": "min_splice_read",
|
||||||
"map_qual": "map_qual"
|
"map_qual": "map_qual"
|
||||||
],
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"junction_saturation_output_plot_r": "output_plot_r",
|
"junction_saturation_output_plot_r": "output_plot_r",
|
||||||
"junction_saturation_output_plot": "output_plot"
|
"junction_saturation_output_plot": "output_plot"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
| rseqc_readdistribution.run(
|
| rseqc_readdistribution.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_distribution" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input": "genome_bam",
|
||||||
"refgene": "gene_bed",
|
"refgene": "gene_bed",
|
||||||
],
|
],
|
||||||
toState: [ "read_distribution_output": "output" ]
|
toState: [ "read_distribution_output": "output" ]
|
||||||
)
|
)
|
||||||
| rseqc_readduplication.run(
|
| rseqc_readduplication.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "read_duplication" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"input": "genome_bam",
|
"input": "genome_bam",
|
||||||
"read_count_upper_limit": "read_count_upper_limit",
|
"read_count_upper_limit": "read_count_upper_limit",
|
||||||
"map_qual": "map_qual"
|
"map_qual": "map_qual"
|
||||||
],
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r",
|
"read_duplication_output_duplication_rate_plot_r": "output_duplication_rate_plot_r",
|
||||||
"read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot",
|
"read_duplication_output_duplication_rate_plot": "output_duplication_rate_plot",
|
||||||
"read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping",
|
"read_duplication_output_duplication_rate_mapping": "output_duplication_rate_mapping",
|
||||||
"read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence"
|
"read_duplication_output_duplication_rate_sequence": "output_duplication_rate_sequence"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
| rseqc_tin.run(
|
| rseqc_tin.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_rseqc && "tin" in state.rseqc_modules && !state.skip_align },
|
||||||
fromState: [
|
fromState: [
|
||||||
"bam_input": "genome_bam",
|
"bam_input": "genome_bam",
|
||||||
"bai_input": "genome_bam_index",
|
"bai_input": "genome_bam_index",
|
||||||
"refgene": "gene_bed",
|
"refgene": "gene_bed",
|
||||||
"minimum_coverage": "minimum_coverage",
|
"minimum_coverage": "minimum_coverage",
|
||||||
"sample_size": "tin_sample_size",
|
"sample_size": "tin_sample_size",
|
||||||
"subtract_background": "subtract_background"
|
"subtract_background": "subtract_background"
|
||||||
],
|
],
|
||||||
toState: [
|
toState: [
|
||||||
"tin_output_summary": "output_tin_summary",
|
"tin_output_summary": "output_tin_summary",
|
||||||
"tin_output_metrics": "output_tin"
|
"tin_output_metrics": "output_tin"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
| dupradar.run(
|
| dupradar.run(
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align },
|
runIf: { id, state -> !state.skip_qc && !state.skip_dupradar && !state.skip_align },
|
||||||
@@ -199,23 +199,25 @@ workflow run_wf {
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
| qualimap.run(
|
// TODO: Add outdir as an output argument to the qualimap module on biobox.
|
||||||
runIf: { id, state -> !state.skip_qc && !state.skip_qualimap && !state.skip_align },
|
// Qualimap ouputs a few more raw data files to outdir but since the module is using a temporary directory as output dir these files are lost.
|
||||||
fromState: [
|
| qualimap_rnaseq.run(
|
||||||
"input": "genome_bam",
|
fromState: [
|
||||||
"gtf": "gtf",
|
"bam": "genome_bam",
|
||||||
"pr_bases": "pr_bases",
|
"gtf": "gtf",
|
||||||
"tr_bias": "tr_bias",
|
"num_pr_bases": "pr_bases",
|
||||||
"algorithm": "algorithm",
|
"num_tr_bias": "tr_bias",
|
||||||
"sequencing_protocol": "sequencing_protocol",
|
"algorithm": "algorithm",
|
||||||
"sorted": "sorted",
|
"sequencing_protocol": "sequencing_protocol",
|
||||||
"java_memory_size": "java_memory_size",
|
"sorted": "sorted",
|
||||||
],
|
"java_memory_size": "java_memory_size",
|
||||||
toState: [
|
],
|
||||||
"qualimap_output_pdf": "output_pdf",
|
toState: [
|
||||||
"qualimap_output_dir": "output_dir"
|
"qualimap_report": "report",
|
||||||
]
|
"qualimap_qc_report": "qc_report",
|
||||||
)
|
"qualimap_counts": "counts"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
merged_ch = qc_ch
|
merged_ch = qc_ch
|
||||||
| toSortedList
|
| toSortedList
|
||||||
@@ -338,10 +340,10 @@ workflow run_wf {
|
|||||||
(state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ?
|
(state.preseq_output instanceof java.nio.file.Path && state.preseq_output.exists()) ?
|
||||||
state.preseq_output :
|
state.preseq_output :
|
||||||
null }
|
null }
|
||||||
def qualimap_output_dir = list.collect { id, state ->
|
// def qualimap_output_dir = list.collect { id, state ->
|
||||||
(state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ?
|
// (state.qualimap_output_dir instanceof java.nio.file.Path && state.qualimap_output_dir.exists()) ?
|
||||||
state.qualimap_output_dir :
|
// state.qualimap_output_dir :
|
||||||
null }
|
// null }
|
||||||
def dupradar_output_dup_intercept_mqc = list.collect { id, state ->
|
def dupradar_output_dup_intercept_mqc = list.collect { id, state ->
|
||||||
(state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ?
|
(state.dupradar_output_dup_intercept_mqc instanceof java.nio.file.Path && state.dupradar_output_dup_intercept_mqc.exists()) ?
|
||||||
state.dupradar_output_dup_intercept_mqc :
|
state.dupradar_output_dup_intercept_mqc :
|
||||||
@@ -426,7 +428,7 @@ workflow run_wf {
|
|||||||
featurecounts_multiqc: featurecounts_multiqc,
|
featurecounts_multiqc: featurecounts_multiqc,
|
||||||
featurecounts_rrna_multiqc: featurecounts_rrna_multiqc,
|
featurecounts_rrna_multiqc: featurecounts_rrna_multiqc,
|
||||||
preseq_output: preseq_output,
|
preseq_output: preseq_output,
|
||||||
qualimap_output_dir: qualimap_output_dir,
|
// qualimap_output_dir: qualimap_output_dir,
|
||||||
dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc,
|
dupradar_output_dup_intercept_mqc: dupradar_output_dup_intercept_mqc,
|
||||||
dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc,
|
dupradar_output_duprate_exp_denscurve_mqc: dupradar_output_duprate_exp_denscurve_mqc,
|
||||||
bamstat_output: bamstat_output,
|
bamstat_output: bamstat_output,
|
||||||
@@ -605,7 +607,7 @@ workflow run_wf {
|
|||||||
"pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo",
|
"pseudo_aligner_pca_multiqc": "deseq2_pca_multiqc_pseudo",
|
||||||
"pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo",
|
"pseudo_aligner_clustering_multiqc": "deseq2_dists_multiqc_pseudo",
|
||||||
"preseq_multiqc": "preseq_output",
|
"preseq_multiqc": "preseq_output",
|
||||||
"qualimap_multiqc": "qualimap_output_dir",
|
// "qualimap_multiqc": "qualimap_output_dir",
|
||||||
"dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc",
|
"dupradar_output_dup_intercept_mqc": "dupradar_output_dup_intercept_mqc",
|
||||||
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
||||||
"bamstat_multiqc": "bamstat_output",
|
"bamstat_multiqc": "bamstat_output",
|
||||||
@@ -705,8 +707,9 @@ workflow run_wf {
|
|||||||
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
||||||
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
||||||
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
||||||
"qualimap_output_dir": "qualimap_output_dir",
|
"qualimap_report": "qualimap_report",
|
||||||
"qualimap_output_pdf": "qualimap_output_pdf",
|
"qualimap_qc_report": "qualimap_qc_report",
|
||||||
|
"qualimap_counts": "qualimap_counts",
|
||||||
"featurecounts": "featurecounts",
|
"featurecounts": "featurecounts",
|
||||||
"featurecounts_summary": "featurecounts_summary",
|
"featurecounts_summary": "featurecounts_summary",
|
||||||
"featurecounts_multiqc": "featurecounts_multiqc",
|
"featurecounts_multiqc": "featurecounts_multiqc",
|
||||||
|
|||||||
@@ -70,13 +70,6 @@ argument_groups:
|
|||||||
- name: "--kallisto_index"
|
- name: "--kallisto_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built Kallisto index.
|
description: Path to directory or tar.gz archive for pre-built Kallisto index.
|
||||||
# - name: "--hisat2_index"
|
|
||||||
# type: file
|
|
||||||
# description: Path to directory or tar.gz archive for pre-built HISAT2 index.
|
|
||||||
# - name: "--hisat2_build_memory"
|
|
||||||
# type: string
|
|
||||||
# description: Minimum memory required to use splice sites and exons in the HiSAT2 index build process.
|
|
||||||
# default: 200.GB
|
|
||||||
- name: "--gencode"
|
- name: "--gencode"
|
||||||
type: boolean_true
|
type: boolean_true
|
||||||
description: Specify if the GTF annotation is in GENCODE format.
|
description: Specify if the GTF annotation is in GENCODE format.
|
||||||
@@ -107,12 +100,6 @@ argument_groups:
|
|||||||
description: Specify the trimming tool to use.
|
description: Specify the trimming tool to use.
|
||||||
choices: ["trimgalore", "fastp"]
|
choices: ["trimgalore", "fastp"]
|
||||||
default: "trimgalore"
|
default: "trimgalore"
|
||||||
- name: "--extra_trimgalore_args"
|
|
||||||
type: string
|
|
||||||
description: Extra arguments to pass to Trim Galore! command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--extra_fastp_args"
|
|
||||||
type: string
|
|
||||||
description: Extra arguments to pass to fastp command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--min_trimmed_reads"
|
- name: "--min_trimmed_reads"
|
||||||
type: integer
|
type: integer
|
||||||
description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.
|
description: Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.
|
||||||
@@ -122,7 +109,8 @@ argument_groups:
|
|||||||
arguments:
|
arguments:
|
||||||
- name: "--bbsplit_fasta_list"
|
- name: "--bbsplit_fasta_list"
|
||||||
type: file
|
type: file
|
||||||
description: Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit. To use BBSplit, "--skip_bbsplit" must be explicitly set to "false". The file should contain 2 (comma separated) columns - short name and full path to reference genome(s)
|
description: List of reference genomes (separated by ";") to filter reads against with BBSplit.
|
||||||
|
multiple: true
|
||||||
- name: "--bbsplit_index"
|
- name: "--bbsplit_index"
|
||||||
type: file
|
type: file
|
||||||
description: Path to directory or tar.gz archive for pre-built BBSplit index.
|
description: Path to directory or tar.gz archive for pre-built BBSplit index.
|
||||||
@@ -185,10 +173,10 @@ argument_groups:
|
|||||||
description: Kmer length passed to indexing step of pseudoaligners.
|
description: Kmer length passed to indexing step of pseudoaligners.
|
||||||
default: 31
|
default: 31
|
||||||
- name: "--kallisto_quant_fragment_length"
|
- name: "--kallisto_quant_fragment_length"
|
||||||
type: integer
|
type: double
|
||||||
description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.
|
description: For single-end mode only, the estimated average fragment length to use for quantification with Kallisto.
|
||||||
- name: "--kallisto_quant_fragment_length_sd"
|
- name: "--kallisto_quant_fragment_length_sd"
|
||||||
type: integer
|
type: double
|
||||||
description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.
|
description: For single-end mode only, the estimated standard deviation of the fragment length for quantification with Kallisto.
|
||||||
- name: "--bam_csi_index"
|
- name: "--bam_csi_index"
|
||||||
type: boolean_true
|
type: boolean_true
|
||||||
@@ -196,10 +184,6 @@ argument_groups:
|
|||||||
- name: "--salmon_quant_libtype"
|
- name: "--salmon_quant_libtype"
|
||||||
type: string
|
type: string
|
||||||
description: Override Salmon library type inferred based on strandedness defined in meta object.
|
description: Override Salmon library type inferred based on strandedness defined in meta object.
|
||||||
- name: "--extra_salmon_quant_args"
|
|
||||||
type: string
|
|
||||||
default: '-v'
|
|
||||||
description: Extra arguments to pass to salmon quant command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--min_mapped_reads"
|
- name: "--min_mapped_reads"
|
||||||
type: integer
|
type: integer
|
||||||
description: Minimum percentage of uniquely mapped reads below which samples are removed from further processing.
|
description: Minimum percentage of uniquely mapped reads below which samples are removed from further processing.
|
||||||
@@ -223,10 +207,6 @@ argument_groups:
|
|||||||
- name: "--skip_pseudo_alignment"
|
- name: "--skip_pseudo_alignment"
|
||||||
type: boolean_true
|
type: boolean_true
|
||||||
description: Skip all of the pseudo-alignment-based processes within the pipeline.
|
description: Skip all of the pseudo-alignment-based processes within the pipeline.
|
||||||
- name: --extra_rsem_calculate_expression_args
|
|
||||||
type: string
|
|
||||||
description: Extra arguments to pass to rsem-calculate-expression command in addition to defaults defined by the pipeline.
|
|
||||||
default: '--star --star-output-genome-bam --star-gzipped-read-file --estimate-rspd --seed 1'
|
|
||||||
|
|
||||||
- name: Process skipping options
|
- name: Process skipping options
|
||||||
arguments:
|
arguments:
|
||||||
@@ -281,18 +261,10 @@ argument_groups:
|
|||||||
|
|
||||||
- name: Other process arguments
|
- name: Other process arguments
|
||||||
arguments:
|
arguments:
|
||||||
- name: "--extra_fq_subsample_args"
|
|
||||||
type: string
|
|
||||||
default: ' --record-count 1000000 --seed 1'
|
|
||||||
description: Extra arguments to pass to fq subsample command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--extra_picard_args"
|
- name: "--extra_picard_args"
|
||||||
type: string
|
type: string
|
||||||
default: ' --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'
|
default: ' --ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'
|
||||||
description: Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline.
|
description: Extra arguments to pass to picard MarkDuplicates command in addition to defaults defined by the pipeline.
|
||||||
- name: "--extra_bedtools_args"
|
|
||||||
type: string
|
|
||||||
default: ' -split -du'
|
|
||||||
description: Extra arguments to pass to bedtools genomecov command in addition to defaults defined by the pipeline.
|
|
||||||
- name: "--extra_preseq_args"
|
- name: "--extra_preseq_args"
|
||||||
type: string
|
type: string
|
||||||
description: Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline
|
description: Extra arguments to pass to preseq lc_extrap command in addition to defaults defined by the pipeline
|
||||||
@@ -367,14 +339,14 @@ argument_groups:
|
|||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
description: Path to output directory
|
description: Path to output directory
|
||||||
default: fastq/$id.read_1.fastq.gz
|
default: fastq/${id}_r1.fastq.gz
|
||||||
- name: "--output_fastq_2"
|
- name: "--output_fastq_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
description: Path to output directory
|
description: Path to output directory
|
||||||
default: fastq/$id.read_2.fastq.gz
|
default: fastq/${id}_r2.fastq.gz
|
||||||
|
|
||||||
# FastQC
|
# FastQC
|
||||||
- name: "--fastqc_html_1"
|
- name: "--fastqc_html_1"
|
||||||
@@ -383,52 +355,52 @@ argument_groups:
|
|||||||
description: FastQC HTML report for read 1.
|
description: FastQC HTML report for read 1.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_raw/$id.read_1.fastqc.html
|
default: fastqc_raw/${id}_r1.fastqc.html
|
||||||
- name: "--fastqc_html_2"
|
- name: "--fastqc_html_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC HTML report for read 2.
|
description: FastQC HTML report for read 2.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_raw/$id.read_2.fastqc.html
|
default: fastqc_raw/${id}_r2.fastqc.html
|
||||||
- name: "--fastqc_zip_1"
|
- name: "--fastqc_zip_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC report archive for read 1.
|
description: FastQC report archive for read 1.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_raw/$id.read_1.fastqc.zip
|
default: fastqc_raw/${id}_r1.fastqc.zip
|
||||||
- name: "--fastqc_zip_2"
|
- name: "--fastqc_zip_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
description: FastQC report archive for read 2.
|
description: FastQC report archive for read 2.
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_raw/$id.read_2.fastqc.zip
|
default: fastqc_raw/${id}_r2.fastqc.zip
|
||||||
- name: "--trim_html_1"
|
- name: "--trim_html_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_trim/$id.read_1.trimmed_fastqc.html
|
default: fastqc_trim/${id}_r1.trimmed_fastqc.html
|
||||||
- name: "--trim_html_2"
|
- name: "--trim_html_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_trim/$id.read_2.trimmed_fastqc.html
|
default: fastqc_trim/${id}_r2.trimmed_fastqc.html
|
||||||
- name: "--trim_zip_1"
|
- name: "--trim_zip_1"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_trim/$id.read_1.trimmed_fastqc.zip
|
default: fastqc_trim/${id}_r1.trimmed_fastqc.zip
|
||||||
- name: "--trim_zip_2"
|
- name: "--trim_zip_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: fastqc_trim/$id.read_2.trimmed_fastqc.zip
|
default: fastqc_trim/${id}_r2.trimmed_fastqc.zip
|
||||||
|
|
||||||
# TrimGalore
|
# TrimGalore
|
||||||
- name: "--trim_log_1"
|
- name: "--trim_log_1"
|
||||||
@@ -436,13 +408,13 @@ argument_groups:
|
|||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: trimgalore/$id.read_1.trimming_report.txt
|
default: trimgalore/${id}_r1.trimming_report.txt
|
||||||
- name: "--trim_log_2"
|
- name: "--trim_log_2"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
required: false
|
||||||
must_exist: false
|
must_exist: false
|
||||||
default: trimgalore/$id.read_2.trimming_report.txt
|
default: trimgalore/${id}_r2.trimming_report.txt
|
||||||
|
|
||||||
# fastp
|
# fastp
|
||||||
- name: --fastp_trim_json
|
- name: --fastp_trim_json
|
||||||
@@ -842,17 +814,21 @@ argument_groups:
|
|||||||
default: dupradar/intercept_slope/$id.intercept_slope.txt
|
default: dupradar/intercept_slope/$id.intercept_slope.txt
|
||||||
|
|
||||||
# Qualimap
|
# Qualimap
|
||||||
- name: "--qualimap_output_pdf"
|
- name: "--qualimap_qc_report"
|
||||||
|
direction: output
|
||||||
|
type: file
|
||||||
|
default: Qualimap/$id.rnaseq_qc_results.txt
|
||||||
|
description: Text file containing the RNAseq QC results.
|
||||||
|
- name: "--qualimap_counts"
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
default: Qualimap/$id.counts.txt
|
||||||
must_exist: false
|
description: Output file for computed counts.
|
||||||
default: qualimap/$id.qualimap_output.pdf
|
- name: "--qualimap_report"
|
||||||
- name: "--qualimap_output_dir"
|
|
||||||
type: file
|
type: file
|
||||||
direction: output
|
direction: output
|
||||||
required: false
|
default: Qualimap/$id.report.html
|
||||||
default: qualimap/$id
|
description: Report output file. Supported formats are PDF or HTML.
|
||||||
|
|
||||||
# DESeq2
|
# DESeq2
|
||||||
- name: "--deseq2_output"
|
- name: "--deseq2_output"
|
||||||
|
|||||||
@@ -419,8 +419,9 @@ workflow run_wf {
|
|||||||
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
||||||
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
||||||
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
||||||
"qualimap_output_dir": "qualimap_output_dir",
|
"qualimap_report": "qualimap_report",
|
||||||
"qualimap_output_pdf": "qualimap_output_pdf",
|
"qualimap_qc_report": "qualimap_qc_report",
|
||||||
|
"qualimap_counts": "qualimap_counts",
|
||||||
"featurecounts": "featurecounts",
|
"featurecounts": "featurecounts",
|
||||||
"featurecounts_summary": "featurecounts_summary",
|
"featurecounts_summary": "featurecounts_summary",
|
||||||
"featurecounts_multiqc": "featurecounts_multiqc",
|
"featurecounts_multiqc": "featurecounts_multiqc",
|
||||||
@@ -534,8 +535,9 @@ workflow run_wf {
|
|||||||
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
"dupradar_output_duprate_exp_denscurve_mqc": "dupradar_output_duprate_exp_denscurve_mqc",
|
||||||
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
"dupradar_output_expression_histogram": "dupradar_output_expression_histogram",
|
||||||
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
"dupradar_output_intercept_slope": "dupradar_output_intercept_slope",
|
||||||
"qualimap_output_dir": "qualimap_output_dir",
|
"qualimap_report": "qualimap_report",
|
||||||
"qualimap_output_pdf": "qualimap_output_pdf",
|
"qualimap_qc_report": "qualimap_qc_report",
|
||||||
|
"qualimap_counts": "qualimap_counts",
|
||||||
"tpm_gene": "tpm_gene",
|
"tpm_gene": "tpm_gene",
|
||||||
"counts_gene": "counts_gene",
|
"counts_gene": "counts_gene",
|
||||||
"counts_gene_length_scaled": "counts_gene_length_scaled",
|
"counts_gene_length_scaled": "counts_gene_length_scaled",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# viash ns build --setup cb --parallel
|
viash ns build --setup cb --parallel
|
||||||
|
|
||||||
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
|
cat > testData/minimal_test/input_fastq/sample_sheet.csv << HERE
|
||||||
id,fastq_1,fastq_2,strandedness
|
id,fastq_1,fastq_2,strandedness
|
||||||
@@ -19,7 +19,7 @@ nextflow run target/nextflow/workflows/rnaseq/main.nf \
|
|||||||
--gtf testData/minimal_test/reference/genes.gtf.gz \
|
--gtf testData/minimal_test/reference/genes.gtf.gz \
|
||||||
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
|
--additional_fasta testData/minimal_test/reference/gfp.fa.gz \
|
||||||
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
--transcript_fasta testData/minimal_test/reference/transcriptome.fasta \
|
||||||
--bbsplit_fasta_list testData/minimal_test/reference/bbsplit_fasta_list.txt \
|
--bbsplit_fasta_list "testData/minimal_test/reference/bbsplit_fasta/sarscov2.fa;testData/minimal_test/reference/bbsplit_fasta/human.fa" \
|
||||||
--skip_pseudo_alignment \
|
--skip_pseudo_alignment \
|
||||||
-profile docker \
|
-profile docker \
|
||||||
--resume
|
--resume
|
||||||
|
|||||||
@@ -0,0 +1,395 @@
|
|||||||
|
name: "bbmap_bbsplit"
|
||||||
|
namespace: "bbmap"
|
||||||
|
version: "main"
|
||||||
|
argument_groups:
|
||||||
|
- name: "Input"
|
||||||
|
arguments:
|
||||||
|
- type: "string"
|
||||||
|
name: "--id"
|
||||||
|
description: "Sample ID"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--paired"
|
||||||
|
description: "Paired fastq files or not?"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "file"
|
||||||
|
name: "--input"
|
||||||
|
description: "Input fastq files, either one or two (paired), separated by \";\"\
|
||||||
|
."
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "reads.fastq"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--ref"
|
||||||
|
description: "Reference FASTA files, separated by \";\". The primary reference\
|
||||||
|
\ should be specified first."
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--only_build_index"
|
||||||
|
description: "If set, only builds the index. Otherwise, mapping is performed."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "file"
|
||||||
|
name: "--build"
|
||||||
|
description: "Index to be used for mapping. \n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--qin"
|
||||||
|
description: "Set to 33 or 64 to specify input quality value ASCII offset. Automatically\
|
||||||
|
\ detected if\nnot specified.\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--interleaved"
|
||||||
|
description: "True forces paired/interleaved input; false forces single-ended\
|
||||||
|
\ mapping.\nIf not specified, interleaved status will be autodetected from read\
|
||||||
|
\ names.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--maxindel"
|
||||||
|
description: "Don't look for indels longer than this. Lower is faster. Set to\
|
||||||
|
\ >=100k for RNA-seq.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 20
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "double"
|
||||||
|
name: "--minratio"
|
||||||
|
description: "Fraction of max alignment score required to keep a site. Higher\
|
||||||
|
\ is faster.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0.56
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--minhits"
|
||||||
|
description: "Minimum number of seed hits required for candidate sites. Higher\
|
||||||
|
\ is faster.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 1
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--ambiguous"
|
||||||
|
description: "Set behavior on ambiguously-mapped reads (with multiple top-scoring\
|
||||||
|
\ mapping locations).\n * best Use the first best site (Default)\n * toss\
|
||||||
|
\ Consider unmapped\n * random Select one top-scoring site randomly\n \
|
||||||
|
\ * all Retain all top-scoring sites. Does not work yet with SAM output\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "best"
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- "best"
|
||||||
|
- "toss"
|
||||||
|
- "random"
|
||||||
|
- "all"
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--ambiguous2"
|
||||||
|
description: "Set behavior only for reads that map ambiguously to multiple different\
|
||||||
|
\ references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\n\
|
||||||
|
Ambiguous2 excludes reads that map ambiguously within a single reference.\n\
|
||||||
|
\ * best Use the first best site (Default)\n * toss Consider unmapped\n\
|
||||||
|
\ * all Write a copy to the output for each reference to which it maps\n\
|
||||||
|
\ * split Write a copy to the AMBIGUOUS_ output for each reference to which\
|
||||||
|
\ it maps\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "best"
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- "best"
|
||||||
|
- "toss"
|
||||||
|
- "all"
|
||||||
|
- "split"
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--qtrim"
|
||||||
|
description: "Quality-trim ends to Q5 before mapping. Options are 'l' (left),\
|
||||||
|
\ 'r' (right), and 'lr' (both).\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- "l"
|
||||||
|
- "r"
|
||||||
|
- "lr"
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--untrim"
|
||||||
|
description: "Undo trimming after mapping. Untrimmed bases will be soft-clipped\
|
||||||
|
\ in cigar strings."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- name: "Output"
|
||||||
|
arguments:
|
||||||
|
- type: "file"
|
||||||
|
name: "--index"
|
||||||
|
description: "Location to write the index.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "BBSplit_index"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--fastq_1"
|
||||||
|
description: "Output file for read 1.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "read_out1.fastq"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--fastq_2"
|
||||||
|
description: "Output file for read 2.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "read_out2.fastq"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--sam2bam"
|
||||||
|
alternatives:
|
||||||
|
- "--bs"
|
||||||
|
description: "Write a shell script to 'file' that will turn the sam output into\
|
||||||
|
\ a sorted, indexed bam file.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "script.sh"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--scafstats"
|
||||||
|
description: "Write statistics on how many reads mapped to which scaffold to this\
|
||||||
|
\ file.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "scaffold_stats.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--refstats"
|
||||||
|
description: "Write statistics on how many reads were assigned to which reference\
|
||||||
|
\ to this file.\nUnmapped reads whose mate mapped to a reference are considered\
|
||||||
|
\ assigned and will be counted.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "reference_stats.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--nzo"
|
||||||
|
description: "Only print lines with nonzero coverage."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "string"
|
||||||
|
name: "--bbmap_args"
|
||||||
|
description: "Additional arguments from BBMap to pass to BBSplit.\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "script.sh"
|
||||||
|
is_executable: true
|
||||||
|
description: "Split sequencing reads by mapping them to multiple references simultaneously."
|
||||||
|
test_resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "test.sh"
|
||||||
|
is_executable: true
|
||||||
|
info: null
|
||||||
|
status: "enabled"
|
||||||
|
requirements:
|
||||||
|
commands:
|
||||||
|
- "ps"
|
||||||
|
license: "BBTools Copyright (c) 2014"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh"
|
||||||
|
homepage: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/"
|
||||||
|
documentation: "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/"
|
||||||
|
runners:
|
||||||
|
- type: "executable"
|
||||||
|
id: "executable"
|
||||||
|
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||||
|
- type: "nextflow"
|
||||||
|
id: "nextflow"
|
||||||
|
directives:
|
||||||
|
tag: "$id"
|
||||||
|
auto:
|
||||||
|
simplifyInput: true
|
||||||
|
simplifyOutput: false
|
||||||
|
transcript: false
|
||||||
|
publish: false
|
||||||
|
config:
|
||||||
|
labels:
|
||||||
|
mem1gb: "memory = 1000000000.B"
|
||||||
|
mem2gb: "memory = 2000000000.B"
|
||||||
|
mem5gb: "memory = 5000000000.B"
|
||||||
|
mem10gb: "memory = 10000000000.B"
|
||||||
|
mem20gb: "memory = 20000000000.B"
|
||||||
|
mem50gb: "memory = 50000000000.B"
|
||||||
|
mem100gb: "memory = 100000000000.B"
|
||||||
|
mem200gb: "memory = 200000000000.B"
|
||||||
|
mem500gb: "memory = 500000000000.B"
|
||||||
|
mem1tb: "memory = 1000000000000.B"
|
||||||
|
mem2tb: "memory = 2000000000000.B"
|
||||||
|
mem5tb: "memory = 5000000000000.B"
|
||||||
|
mem10tb: "memory = 10000000000000.B"
|
||||||
|
mem20tb: "memory = 20000000000000.B"
|
||||||
|
mem50tb: "memory = 50000000000000.B"
|
||||||
|
mem100tb: "memory = 100000000000000.B"
|
||||||
|
mem200tb: "memory = 200000000000000.B"
|
||||||
|
mem500tb: "memory = 500000000000000.B"
|
||||||
|
mem1gib: "memory = 1073741824.B"
|
||||||
|
mem2gib: "memory = 2147483648.B"
|
||||||
|
mem4gib: "memory = 4294967296.B"
|
||||||
|
mem8gib: "memory = 8589934592.B"
|
||||||
|
mem16gib: "memory = 17179869184.B"
|
||||||
|
mem32gib: "memory = 34359738368.B"
|
||||||
|
mem64gib: "memory = 68719476736.B"
|
||||||
|
mem128gib: "memory = 137438953472.B"
|
||||||
|
mem256gib: "memory = 274877906944.B"
|
||||||
|
mem512gib: "memory = 549755813888.B"
|
||||||
|
mem1tib: "memory = 1099511627776.B"
|
||||||
|
mem2tib: "memory = 2199023255552.B"
|
||||||
|
mem4tib: "memory = 4398046511104.B"
|
||||||
|
mem8tib: "memory = 8796093022208.B"
|
||||||
|
mem16tib: "memory = 17592186044416.B"
|
||||||
|
mem32tib: "memory = 35184372088832.B"
|
||||||
|
mem64tib: "memory = 70368744177664.B"
|
||||||
|
mem128tib: "memory = 140737488355328.B"
|
||||||
|
mem256tib: "memory = 281474976710656.B"
|
||||||
|
mem512tib: "memory = 562949953421312.B"
|
||||||
|
cpu1: "cpus = 1"
|
||||||
|
cpu2: "cpus = 2"
|
||||||
|
cpu5: "cpus = 5"
|
||||||
|
cpu10: "cpus = 10"
|
||||||
|
cpu20: "cpus = 20"
|
||||||
|
cpu50: "cpus = 50"
|
||||||
|
cpu100: "cpus = 100"
|
||||||
|
cpu200: "cpus = 200"
|
||||||
|
cpu500: "cpus = 500"
|
||||||
|
cpu1000: "cpus = 1000"
|
||||||
|
debug: false
|
||||||
|
container: "docker"
|
||||||
|
engines:
|
||||||
|
- type: "docker"
|
||||||
|
id: "docker"
|
||||||
|
image: "ubuntu:22.04"
|
||||||
|
target_registry: "images.viash-hub.com"
|
||||||
|
target_tag: "main"
|
||||||
|
namespace_separator: "/"
|
||||||
|
setup:
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "apt-get update && \\\napt-get install -y build-essential openjdk-17-jdk wget\
|
||||||
|
\ tar && \\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz\
|
||||||
|
\ && \\\ntar xzf BBMap_39.01.tar.gz && \\\ncp -r bbmap/* /usr/local/bin\n"
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \"BBMAP:\", $NF}' >\
|
||||||
|
\ /var/software_versions.txt\n"
|
||||||
|
entrypoint: []
|
||||||
|
cmd: null
|
||||||
|
- type: "native"
|
||||||
|
id: "native"
|
||||||
|
build_info:
|
||||||
|
config: "src/bbmap/bbmap_bbsplit/config.vsh.yaml"
|
||||||
|
runner: "nextflow"
|
||||||
|
engine: "docker|native"
|
||||||
|
output: "target/nextflow/bbmap/bbmap_bbsplit"
|
||||||
|
executable: "target/nextflow/bbmap/bbmap_bbsplit/main.nf"
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
|
package_config:
|
||||||
|
name: "biobox"
|
||||||
|
version: "main"
|
||||||
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
|
info: null
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
source: "src"
|
||||||
|
target: "target"
|
||||||
|
config_mods:
|
||||||
|
- ".requirements.commands := ['ps']\n"
|
||||||
|
- ".engines += { type: \"native\" }"
|
||||||
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
// umitools_extract main
|
// bbmap_bbsplit main
|
||||||
//
|
//
|
||||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||||
@@ -2804,19 +2804,91 @@ nextflow.enable.dsl=2
|
|||||||
meta = [
|
meta = [
|
||||||
"resources_dir": moduleDir.toRealPath().normalize(),
|
"resources_dir": moduleDir.toRealPath().normalize(),
|
||||||
"config": processConfig(readJsonBlob('''{
|
"config": processConfig(readJsonBlob('''{
|
||||||
"name" : "umitools_extract",
|
"name" : "bbmap_bbsplit",
|
||||||
"namespace" : "umitools",
|
"namespace" : "bbmap",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"argument_groups" : [
|
"argument_groups" : [
|
||||||
{
|
{
|
||||||
"name" : "Input",
|
"name" : "Input",
|
||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "boolean",
|
"type" : "string",
|
||||||
|
"name" : "--id",
|
||||||
|
"description" : "Sample ID",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
"name" : "--paired",
|
"name" : "--paired",
|
||||||
"description" : "Paired fastq files or not?",
|
"description" : "Paired fastq files or not?",
|
||||||
"default" : [
|
"direction" : "input"
|
||||||
false
|
},
|
||||||
|
{
|
||||||
|
"type" : "file",
|
||||||
|
"name" : "--input",
|
||||||
|
"description" : "Input fastq files, either one or two (paired), separated by \\";\\".",
|
||||||
|
"example" : [
|
||||||
|
"reads.fastq"
|
||||||
|
],
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : true,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "file",
|
||||||
|
"name" : "--ref",
|
||||||
|
"description" : "Reference FASTA files, separated by \\";\\". The primary reference should be specified first.",
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : true,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--only_build_index",
|
||||||
|
"description" : "If set, only builds the index. Otherwise, mapping is performed.",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "file",
|
||||||
|
"name" : "--build",
|
||||||
|
"description" : "Index to be used for mapping. \n",
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "string",
|
||||||
|
"name" : "--qin",
|
||||||
|
"description" : "Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--interleaved",
|
||||||
|
"description" : "True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--maxindel",
|
||||||
|
"description" : "Don't look for indels longer than this. Lower is faster. Set to >=100k for RNA-seq.\n",
|
||||||
|
"example" : [
|
||||||
|
20
|
||||||
],
|
],
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
@@ -2824,27 +2896,84 @@ meta = [
|
|||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "double",
|
||||||
"name" : "--input",
|
"name" : "--minratio",
|
||||||
"description" : "Input fastq files, either one or two (paired)",
|
"description" : "Fraction of max alignment score required to keep a site. Higher is faster.\n",
|
||||||
"example" : [
|
"example" : [
|
||||||
"sample.fastq"
|
0.56
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"required" : false,
|
||||||
"create_parent" : true,
|
|
||||||
"required" : true,
|
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : true,
|
"multiple" : false,
|
||||||
"multiple_sep" : ","
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--minhits",
|
||||||
|
"description" : "Minimum number of seed hits required for candidate sites. Higher is faster.\n",
|
||||||
|
"example" : [
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "string",
|
"type" : "string",
|
||||||
"name" : "--bc_pattern",
|
"name" : "--ambiguous",
|
||||||
"description" : "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides of the read are from the UMI.",
|
"description" : "Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n",
|
||||||
|
"example" : [
|
||||||
|
"best"
|
||||||
|
],
|
||||||
"required" : false,
|
"required" : false,
|
||||||
|
"choices" : [
|
||||||
|
"best",
|
||||||
|
"toss",
|
||||||
|
"random",
|
||||||
|
"all"
|
||||||
|
],
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : true,
|
"multiple" : false,
|
||||||
"multiple_sep" : ","
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "string",
|
||||||
|
"name" : "--ambiguous2",
|
||||||
|
"description" : "Set behavior only for reads that map ambiguously to multiple different references.\nNormal 'ambiguous=' controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n",
|
||||||
|
"example" : [
|
||||||
|
"best"
|
||||||
|
],
|
||||||
|
"required" : false,
|
||||||
|
"choices" : [
|
||||||
|
"best",
|
||||||
|
"toss",
|
||||||
|
"all",
|
||||||
|
"split"
|
||||||
|
],
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "string",
|
||||||
|
"name" : "--qtrim",
|
||||||
|
"description" : "Quality-trim ends to Q5 before mapping. Options are 'l' (left), 'r' (right), and 'lr' (both).\n",
|
||||||
|
"required" : false,
|
||||||
|
"choices" : [
|
||||||
|
"l",
|
||||||
|
"r",
|
||||||
|
"lr"
|
||||||
|
],
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--untrim",
|
||||||
|
"description" : "Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings.",
|
||||||
|
"direction" : "input"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -2853,14 +2982,28 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--fastq_1",
|
"name" : "--index",
|
||||||
"description" : "Output file for read 1.",
|
"description" : "Location to write the index.\n",
|
||||||
"default" : [
|
"example" : [
|
||||||
"$id.$key.read_1.fastq"
|
"BBSplit_index"
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : true,
|
"required" : false,
|
||||||
|
"direction" : "output",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "file",
|
||||||
|
"name" : "--fastq_1",
|
||||||
|
"description" : "Output file for read 1.\n",
|
||||||
|
"example" : [
|
||||||
|
"read_out1.fastq"
|
||||||
|
],
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
@@ -2868,82 +3011,73 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--fastq_2",
|
"name" : "--fastq_2",
|
||||||
"description" : "Output file for read 2.",
|
"description" : "Output file for read 2.\n",
|
||||||
"default" : [
|
"example" : [
|
||||||
"$id.$key.read_2.fastq"
|
"read_out2.fastq"
|
||||||
],
|
],
|
||||||
"must_exist" : false,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
}
|
},
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name" : "Optional arguments",
|
|
||||||
"arguments" : [
|
|
||||||
{
|
{
|
||||||
"type" : "string",
|
"type" : "file",
|
||||||
"name" : "--umitools_extract_method",
|
"name" : "--sam2bam",
|
||||||
"description" : "UMI pattern to use.",
|
"alternatives" : [
|
||||||
"default" : [
|
"--bs"
|
||||||
"string"
|
|
||||||
],
|
],
|
||||||
|
"description" : "Write a shell script to 'file' that will turn the sam output into a sorted, indexed bam file.\n",
|
||||||
|
"example" : [
|
||||||
|
"script.sh"
|
||||||
|
],
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"choices" : [
|
"direction" : "output",
|
||||||
"string",
|
|
||||||
"regex"
|
|
||||||
],
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "string",
|
"type" : "file",
|
||||||
"name" : "--umitools_umi_separator",
|
"name" : "--scafstats",
|
||||||
"description" : "The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with UMI-tools and used other software.",
|
"description" : "Write statistics on how many reads mapped to which scaffold to this file.\n",
|
||||||
"default" : [
|
"example" : [
|
||||||
"_"
|
"scaffold_stats.txt"
|
||||||
],
|
],
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "input",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "string",
|
"type" : "file",
|
||||||
"name" : "--umitools_grouping_method",
|
"name" : "--refstats",
|
||||||
"description" : "Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying the reads with the same mapping position, but treat similar yet nonidentical UMIs differently.",
|
"description" : "Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n",
|
||||||
"default" : [
|
"example" : [
|
||||||
"directional"
|
"reference_stats.txt"
|
||||||
],
|
],
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"choices" : [
|
"direction" : "output",
|
||||||
"unique",
|
|
||||||
"percentile",
|
|
||||||
"cluster",
|
|
||||||
"adjacency",
|
|
||||||
"directional"
|
|
||||||
],
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "integer",
|
"type" : "boolean_true",
|
||||||
"name" : "--umi_discard_read",
|
"name" : "--nzo",
|
||||||
"description" : "After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively.",
|
"description" : "Only print lines with nonzero coverage.",
|
||||||
"default" : [
|
"direction" : "input"
|
||||||
0
|
},
|
||||||
],
|
{
|
||||||
|
"type" : "string",
|
||||||
|
"name" : "--bbmap_args",
|
||||||
|
"description" : "Additional arguments from BBMap to pass to BBSplit.\n",
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"choices" : [
|
|
||||||
0,
|
|
||||||
1,
|
|
||||||
2
|
|
||||||
],
|
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
@@ -2958,56 +3092,26 @@ meta = [
|
|||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description" : "UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. See https://umi-tools.readthedocs.io/en/latest/ for more information.\nThis component flexible removes UMI sequences from fastq reads. UMIs are removed and appended to the read name.\nThis component extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place\n",
|
"description" : "Split sequencing reads by mapping them to multiple references simultaneously.",
|
||||||
"test_resources" : [
|
"test_resources" : [
|
||||||
{
|
{
|
||||||
"type" : "bash_script",
|
"type" : "bash_script",
|
||||||
"path" : "test.sh",
|
"path" : "test.sh",
|
||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/unit_test_resources/scrb_seq_fastq.1.gz"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/unit_test_resources/scrb_seq_fastq.2.gz"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/unit_test_resources/slim.fastq.gz"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"info" : {
|
|
||||||
"migration_info" : {
|
|
||||||
"git_repo" : "https://github.com/nf-core/rnaseq.git",
|
|
||||||
"paths" : [
|
|
||||||
"modules/nf-core/umitools/extract/main.nf",
|
|
||||||
"modules/nf-core/umitools/extract/meta.yml"
|
|
||||||
],
|
|
||||||
"last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"status" : "enabled",
|
"status" : "enabled",
|
||||||
"requirements" : {
|
"requirements" : {
|
||||||
"commands" : [
|
"commands" : [
|
||||||
"ps"
|
"ps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"repositories" : [
|
"license" : "BBTools Copyright (c) 2014",
|
||||||
{
|
"links" : {
|
||||||
"type" : "vsh",
|
"repository" : "https://github.com/BioInfoTools/BBMap/blob/master/sh/bbsplit.sh",
|
||||||
"name" : "biobox",
|
"homepage" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/",
|
||||||
"repo" : "vsh/biobox",
|
"documentation" : "https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbmap-guide/"
|
||||||
"tag" : "main"
|
},
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"runners" : [
|
"runners" : [
|
||||||
{
|
{
|
||||||
"type" : "executable",
|
"type" : "executable",
|
||||||
@@ -3092,19 +3196,16 @@ meta = [
|
|||||||
"namespace_separator" : "/",
|
"namespace_separator" : "/",
|
||||||
"setup" : [
|
"setup" : [
|
||||||
{
|
{
|
||||||
"type" : "apt",
|
"type" : "docker",
|
||||||
"packages" : [
|
"run" : [
|
||||||
"pip"
|
"apt-get update && \\\\\napt-get install -y build-essential openjdk-17-jdk wget tar && \\\\\nwget --no-check-certificate https://sourceforge.net/projects/bbmap/files/BBMap_39.01.tar.gz && \\\\\ntar xzf BBMap_39.01.tar.gz && \\\\\ncp -r bbmap/* /usr/local/bin\n"
|
||||||
],
|
]
|
||||||
"interactive" : false
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "python",
|
"type" : "docker",
|
||||||
"user" : false,
|
"run" : [
|
||||||
"packages" : [
|
"bbsplit.sh --version 2>&1 | awk '/BBMap version/{print \\"BBMAP:\\", $NF}' > /var/software_versions.txt\n"
|
||||||
"umi_tools"
|
]
|
||||||
],
|
|
||||||
"upgrade" : true
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -3114,49 +3215,39 @@ meta = [
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"build_info" : {
|
"build_info" : {
|
||||||
"config" : "/workdir/root/repo/src/umitools/umitools_extract/config.vsh.yaml",
|
"config" : "/workdir/root/repo/src/bbmap/bbmap_bbsplit/config.vsh.yaml",
|
||||||
"runner" : "nextflow",
|
"runner" : "nextflow",
|
||||||
"engine" : "docker|native",
|
"engine" : "docker|native",
|
||||||
"output" : "/workdir/root/repo/target/nextflow/umitools/umitools_extract",
|
"output" : "target/nextflow/bbmap/bbmap_bbsplit",
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
|
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
|
||||||
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
|
||||||
|
"git_tag" : "v0.2.0-26-ga13b57d"
|
||||||
},
|
},
|
||||||
"package_config" : {
|
"package_config" : {
|
||||||
"name" : "rnaseq",
|
"name" : "biobox",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"info" : {
|
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
|
||||||
"test_resources" : [
|
|
||||||
{
|
|
||||||
"path" : "gs://viash-hub-test-data/rnaseq/v1",
|
|
||||||
"dest" : "testData"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"repositories" : [
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"source" : "/workdir/root/repo/src",
|
"source" : "src",
|
||||||
"target" : "/workdir/root/repo/target",
|
"target" : "target",
|
||||||
"config_mods" : [
|
"config_mods" : [
|
||||||
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
|
".requirements.commands := ['ps']\n",
|
||||||
".engines += { type: \\"native\\" }",
|
".engines += { type: \\"native\\" }",
|
||||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||||
".engines[.type == 'docker'].target_tag := 'main'"
|
".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
],
|
],
|
||||||
"organization" : "vsh"
|
"keywords" : [
|
||||||
|
"bioinformatics",
|
||||||
|
"modules",
|
||||||
|
"sequencing"
|
||||||
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"organization" : "vsh",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/viash-hub/biobox",
|
||||||
|
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'''))
|
}'''))
|
||||||
]
|
]
|
||||||
@@ -3170,17 +3261,33 @@ def innerWorkflowFactory(args) {
|
|||||||
def rawScript = '''set -e
|
def rawScript = '''set -e
|
||||||
tempscript=".viash_script.sh"
|
tempscript=".viash_script.sh"
|
||||||
cat > "$tempscript" << VIASHMAIN
|
cat > "$tempscript" << VIASHMAIN
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
## VIASH START
|
## VIASH START
|
||||||
# The following code has been auto-generated by Viash.
|
# The following code has been auto-generated by Viash.
|
||||||
|
$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
|
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_BC_PATTERN+x} ]; then echo "${VIASH_PAR_BC_PATTERN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bc_pattern='&'#" ; else echo "# par_bc_pattern="; fi )
|
$( if [ ! -z ${VIASH_PAR_REF+x} ]; then echo "${VIASH_PAR_REF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ref='&'#" ; else echo "# par_ref="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_ONLY_BUILD_INDEX+x} ]; then echo "${VIASH_PAR_ONLY_BUILD_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_only_build_index='&'#" ; else echo "# par_only_build_index="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_BUILD+x} ]; then echo "${VIASH_PAR_BUILD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_build='&'#" ; else echo "# par_build="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_QIN+x} ]; then echo "${VIASH_PAR_QIN}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qin='&'#" ; else echo "# par_qin="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_INTERLEAVED+x} ]; then echo "${VIASH_PAR_INTERLEAVED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_interleaved='&'#" ; else echo "# par_interleaved="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_MAXINDEL+x} ]; then echo "${VIASH_PAR_MAXINDEL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_maxindel='&'#" ; else echo "# par_maxindel="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_MINRATIO+x} ]; then echo "${VIASH_PAR_MINRATIO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minratio='&'#" ; else echo "# par_minratio="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_MINHITS+x} ]; then echo "${VIASH_PAR_MINHITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_minhits='&'#" ; else echo "# par_minhits="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_AMBIGUOUS+x} ]; then echo "${VIASH_PAR_AMBIGUOUS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous='&'#" ; else echo "# par_ambiguous="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_AMBIGUOUS2+x} ]; then echo "${VIASH_PAR_AMBIGUOUS2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ambiguous2='&'#" ; else echo "# par_ambiguous2="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_QTRIM+x} ]; then echo "${VIASH_PAR_QTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qtrim='&'#" ; else echo "# par_qtrim="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_UNTRIM+x} ]; then echo "${VIASH_PAR_UNTRIM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_untrim='&'#" ; else echo "# par_untrim="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi )
|
$( if [ ! -z ${VIASH_PAR_FASTQ_1+x} ]; then echo "${VIASH_PAR_FASTQ_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_1='&'#" ; else echo "# par_fastq_1="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi )
|
$( if [ ! -z ${VIASH_PAR_FASTQ_2+x} ]; then echo "${VIASH_PAR_FASTQ_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fastq_2='&'#" ; else echo "# par_fastq_2="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_UMITOOLS_EXTRACT_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_EXTRACT_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_extract_method='&'#" ; else echo "# par_umitools_extract_method="; fi )
|
$( if [ ! -z ${VIASH_PAR_SAM2BAM+x} ]; then echo "${VIASH_PAR_SAM2BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sam2bam='&'#" ; else echo "# par_sam2bam="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_UMITOOLS_UMI_SEPARATOR+x} ]; then echo "${VIASH_PAR_UMITOOLS_UMI_SEPARATOR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_umi_separator='&'#" ; else echo "# par_umitools_umi_separator="; fi )
|
$( if [ ! -z ${VIASH_PAR_SCAFSTATS+x} ]; then echo "${VIASH_PAR_SCAFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scafstats='&'#" ; else echo "# par_scafstats="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_UMITOOLS_GROUPING_METHOD+x} ]; then echo "${VIASH_PAR_UMITOOLS_GROUPING_METHOD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umitools_grouping_method='&'#" ; else echo "# par_umitools_grouping_method="; fi )
|
$( if [ ! -z ${VIASH_PAR_REFSTATS+x} ]; then echo "${VIASH_PAR_REFSTATS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_refstats='&'#" ; else echo "# par_refstats="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_UMI_DISCARD_READ+x} ]; then echo "${VIASH_PAR_UMI_DISCARD_READ}" | sed "s#'#'\\"'\\"'#g;s#.*#par_umi_discard_read='&'#" ; else echo "# par_umi_discard_read="; fi )
|
$( if [ ! -z ${VIASH_PAR_NZO+x} ]; then echo "${VIASH_PAR_NZO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nzo='&'#" ; else echo "# par_nzo="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_BBMAP_ARGS+x} ]; then echo "${VIASH_PAR_BBMAP_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bbmap_args='&'#" ; else echo "# par_bbmap_args="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
||||||
@@ -3201,7 +3308,6 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
|
|||||||
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
||||||
|
|
||||||
## VIASH END
|
## VIASH END
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
set -eo pipefail
|
||||||
|
|
||||||
@@ -3210,56 +3316,85 @@ function clean_up {
|
|||||||
}
|
}
|
||||||
trap clean_up EXIT
|
trap clean_up EXIT
|
||||||
|
|
||||||
tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX")
|
unset_if_false=( par_paired par_only_build_index par_interleaved par_untrim par_nzo)
|
||||||
|
|
||||||
IFS="," read -ra input <<< "\\$par_input"
|
for var in "\\${unset_if_false[@]}"; do
|
||||||
IFS="," read -ra pattern <<< "\\$par_bc_pattern"
|
if [ -z "\\${!var}" ]; then
|
||||||
|
unset \\$var
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
read_count="\\${#input[@]}"
|
if [ ! -d "\\$par_build" ]; then
|
||||||
pattern_count="\\${#pattern[@]}"
|
IFS=";" read -ra ref_files <<< "\\$par_ref"
|
||||||
|
primary_ref="\\${ref_files[0]}"
|
||||||
|
refs=()
|
||||||
|
for file in "\\${ref_files[@]:1}"
|
||||||
|
do
|
||||||
|
name=\\$(basename "\\$file" | sed 's/\\\\.[^.]*\\$//')
|
||||||
|
refs+=("ref_\\$name=\\$file")
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "\\$par_paired" == "true" ]; then
|
if \\$par_only_build_index; then
|
||||||
echo "Paired - Reads: \\$read_count bc_patterns: \\$pattern_count"
|
if [ "\\${#refs[@]}" -gt 1 ]; then
|
||||||
if [ "\\$read_count" -ne 2 ] || [ "\\$pattern_count" -ne 2 ]; then
|
bbsplit.sh \\\\
|
||||||
echo "Paired end input requires two read files and two UMI patterns"
|
--ref_primary="\\$primary_ref" \\\\
|
||||||
exit 1
|
"\\${refs[@]}" \\\\
|
||||||
|
path=\\$par_index
|
||||||
else
|
else
|
||||||
read1="\\$(basename -- \\${input[0]})"
|
echo "ERROR: Please specify at least two reference fasta files."
|
||||||
read2="\\$(basename -- \\${input[1]})"
|
|
||||||
umi_tools extract \\\\
|
|
||||||
-I "\\${input[0]}" --read2-in="\\${input[1]}" \\\\
|
|
||||||
-S "\\$tmpdir/\\$read1" \\\\
|
|
||||||
--read2-out="\\$tmpdir/\\$read2" \\\\
|
|
||||||
--extract-method \\$par_umitools_extract_method \\\\
|
|
||||||
--bc-pattern "\\${pattern[0]}" \\\\
|
|
||||||
--bc-pattern2 "\\${pattern[1]}" \\\\
|
|
||||||
--umi-separator \\$par_umitools_umi_separator
|
|
||||||
if [ \\$par_umi_discard_read == 1 ]; then
|
|
||||||
# discard read 1
|
|
||||||
cp \\$tmpdir/\\$read1 \\$par_fastq_1
|
|
||||||
elif [ \\$par_umi_discard_read == 2 ]; then
|
|
||||||
# discard read 2
|
|
||||||
cp \\$tmpdir/\\$read2 \\$par_fastq_1
|
|
||||||
else
|
|
||||||
cp \\$tmpdir/\\$read1 \\$par_fastq_1
|
|
||||||
cp \\$tmpdir/\\$read2 \\$par_fastq_2
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Not Paired - \\$read_count"
|
IFS=";" read -ra input <<< "\\$par_input"
|
||||||
if [ "\\$read_count" -ne 1 ] || [ "\\$pattern_count" -ne 1 ]; then
|
tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_name-XXXXXXXX")
|
||||||
echo "Single end input requires one read file and one UMI pattern"
|
index_files=''
|
||||||
exit 1
|
if [ -d "\\$par_build" ]; then
|
||||||
|
index_files="path=\\$par_build"
|
||||||
|
elif [ \\${#refs[@]} -gt 0 ]; then
|
||||||
|
index_files="--ref_primary=\\$primary_ref \\${refs[*]}"
|
||||||
else
|
else
|
||||||
read1="\\$(basename -- \\${input[0]})"
|
echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files."
|
||||||
umi_tools extract \\\\
|
fi
|
||||||
-I "\\${input[0]}" -S "\\$tmpdir/\\$read1" \\\\
|
|
||||||
--extract-method \\$par_umitools_extract_method \\\\
|
extra_args=""
|
||||||
--bc-pattern "\\${pattern[0]}" \\\\
|
if [ -f "\\$par_refstats" ]; then extra_args+=" --refstats \\$par_refstats"; fi
|
||||||
--umi-separator \\$par_umitools_umi_separator
|
if [ -n "\\$par_ambiguous" ]; then extra_args+=" --ambiguous \\$par_ambiguous"; fi
|
||||||
cp \\$tmpdir/\\$read1 \\$par_fastq_1
|
if [ -n "\\$par_ambiguous2" ]; then extra_args+=" --ambiguous2 \\$par_ambiguous2"; fi
|
||||||
|
if [ -n "\\$par_minratio" ]; then extra_args+=" --minratio \\$par_minratio"; fi
|
||||||
|
if [ -n "\\$par_minhits" ]; then extra_args+=" --minhits \\$par_minhits"; fi
|
||||||
|
if [ -n "\\$par_maxindel" ]; then extra_args+=" --maxindel \\$par_maxindel"; fi
|
||||||
|
if [ -n "\\$par_qin" ]; then extra_args+=" --qin \\$par_qin"; fi
|
||||||
|
if [ -n "\\$par_qtrim" ]; then extra_args+=" --qtrim \\$par_qtrim"; fi
|
||||||
|
if [ "\\$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi
|
||||||
|
if [ "\\$par_untrim" = true ]; then extra_args+=" --untrim"; fi
|
||||||
|
if [ "\\$par_nzo" = true ]; then extra_args+=" --nzo"; fi
|
||||||
|
|
||||||
|
if [ -n "\\$par_bbmap_args" ]; then extra_args+=" \\$par_bbmap_args"; fi
|
||||||
|
|
||||||
|
|
||||||
|
if \\$par_paired; then
|
||||||
|
bbsplit.sh \\\\
|
||||||
|
\\$index_files \\\\
|
||||||
|
in=\\${input[0]} \\\\
|
||||||
|
in2=\\${input[1]} \\\\
|
||||||
|
basename=\\${tmpdir}/%_#.fastq \\\\
|
||||||
|
\\$extra_args
|
||||||
|
read1=\\$(find \\$tmpdir/ -iname primary_1*)
|
||||||
|
read2=\\$(find \\$tmpdir/ -iname primary_2*)
|
||||||
|
cp \\$read1 \\$par_fastq_1
|
||||||
|
cp \\$read2 \\$par_fastq_2
|
||||||
|
else
|
||||||
|
bbsplit.sh \\\\
|
||||||
|
\\$index_files \\\\
|
||||||
|
in=\\${input[0]} \\\\
|
||||||
|
basename=\\${tmpdir}/%.fastq \\\\
|
||||||
|
\\$extra_args
|
||||||
|
read1=\\$(find \\$tmpdir/ -iname primary*)
|
||||||
|
cp \\$read1 \\$par_fastq_1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
VIASHMAIN
|
VIASHMAIN
|
||||||
bash "$tempscript"
|
bash "$tempscript"
|
||||||
'''
|
'''
|
||||||
@@ -3620,7 +3755,7 @@ meta["defaults"] = [
|
|||||||
directives: readJsonBlob('''{
|
directives: readJsonBlob('''{
|
||||||
"container" : {
|
"container" : {
|
||||||
"registry" : "images.viash-hub.com",
|
"registry" : "images.viash-hub.com",
|
||||||
"image" : "vsh/rnaseq/umitools/umitools_extract",
|
"image" : "vsh/biobox/bbmap/bbmap_bbsplit",
|
||||||
"tag" : "main"
|
"tag" : "main"
|
||||||
},
|
},
|
||||||
"tag" : "$id"
|
"tag" : "$id"
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
manifest {
|
manifest {
|
||||||
name = 'bbmap_bbsplit'
|
name = 'bbmap/bbmap_bbsplit'
|
||||||
mainScript = 'main.nf'
|
mainScript = 'main.nf'
|
||||||
nextflowVersion = '!>=20.12.1-edge'
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
version = 'main'
|
version = 'main'
|
||||||
description = 'Split sequencing reads by mapping them to multiple references simultaneously.\n'
|
description = 'Split sequencing reads by mapping them to multiple references simultaneously.'
|
||||||
}
|
}
|
||||||
|
|
||||||
process.container = 'nextflow/bash:latest'
|
process.container = 'nextflow/bash:latest'
|
||||||
@@ -0,0 +1,321 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
|
"title": "bbmap_bbsplit",
|
||||||
|
"description": "Split sequencing reads by mapping them to multiple references simultaneously.",
|
||||||
|
"type": "object",
|
||||||
|
"definitions": {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"input" : {
|
||||||
|
"title": "Input",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"id": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`. Sample ID",
|
||||||
|
"help_text": "Type: `string`. Sample ID"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"paired": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Paired fastq files or not?",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Paired fastq files or not?"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"input": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"",
|
||||||
|
"help_text": "Type: List of `file`, example: `reads.fastq`, multiple_sep: `\";\"`. Input fastq files, either one or two (paired), separated by \";\"."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ref": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\"",
|
||||||
|
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. Reference FASTA files, separated by \";\". The primary reference should be specified first."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"only_build_index": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. If set, only builds the index",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. If set, only builds the index. Otherwise, mapping is performed."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"build": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. Index to be used for mapping",
|
||||||
|
"help_text": "Type: `file`. Index to be used for mapping. \n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"qin": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset",
|
||||||
|
"help_text": "Type: `string`. Set to 33 or 64 to specify input quality value ASCII offset. Automatically detected if\nnot specified.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"interleaved": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. True forces paired/interleaved input; false forces single-ended mapping.\nIf not specified, interleaved status will be autodetected from read names.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"maxindel": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this",
|
||||||
|
"help_text": "Type: `integer`, example: `20`. Don\u0027t look for indels longer than this. Lower is faster. Set to \u003e=100k for RNA-seq.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"minratio": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site",
|
||||||
|
"help_text": "Type: `double`, example: `0.56`. Fraction of max alignment score required to keep a site. Higher is faster.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"minhits": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites",
|
||||||
|
"help_text": "Type: `integer`, example: `1`. Minimum number of seed hits required for candidate sites. Higher is faster.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ambiguous": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations)",
|
||||||
|
"help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `random`, `all``. Set behavior on ambiguously-mapped reads (with multiple top-scoring mapping locations).\n * best Use the first best site (Default)\n * toss Consider unmapped\n * random Select one top-scoring site randomly\n * all Retain all top-scoring sites. Does not work yet with SAM output\n",
|
||||||
|
"enum": ["best", "toss", "random", "all"]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ambiguous2": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references",
|
||||||
|
"help_text": "Type: `string`, example: `best`, choices: ``best`, `toss`, `all`, `split``. Set behavior only for reads that map ambiguously to multiple different references.\nNormal \u0027ambiguous=\u0027 controls behavior on all ambiguous reads;\nAmbiguous2 excludes reads that map ambiguously within a single reference.\n * best Use the first best site (Default)\n * toss Consider unmapped\n * all Write a copy to the output for each reference to which it maps\n * split Write a copy to the AMBIGUOUS_ output for each reference to which it maps\n",
|
||||||
|
"enum": ["best", "toss", "all", "split"]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"qtrim": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping",
|
||||||
|
"help_text": "Type: `string`, choices: ``l`, `r`, `lr``. Quality-trim ends to Q5 before mapping. Options are \u0027l\u0027 (left), \u0027r\u0027 (right), and \u0027lr\u0027 (both).\n",
|
||||||
|
"enum": ["l", "r", "lr"]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"untrim": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Undo trimming after mapping",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Undo trimming after mapping. Untrimmed bases will be soft-clipped in cigar strings."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"output" : {
|
||||||
|
"title": "Output",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"index": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.index.index`, example: `BBSplit_index`. Location to write the index.\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.index.index"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fastq_1": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`, example: `read_out1.fastq`. Output file for read 1.\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.fastq_1.fastq"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fastq_2": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`, example: `read_out2.fastq`. Output file for read 2.\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.fastq_2.fastq"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"sam2bam": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.sam2bam.sh`, example: `script.sh`. Write a shell script to \u0027file\u0027 that will turn the sam output into a sorted, indexed bam file.\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.sam2bam.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"scafstats": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.scafstats.txt`, example: `scaffold_stats.txt`. Write statistics on how many reads mapped to which scaffold to this file.\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.scafstats.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"refstats": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.refstats.txt`, example: `reference_stats.txt`. Write statistics on how many reads were assigned to which reference to this file.\nUnmapped reads whose mate mapped to a reference are considered assigned and will be counted.\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.refstats.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"nzo": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Only print lines with nonzero coverage."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bbmap_args": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`. Additional arguments from BBMap to pass to BBSplit",
|
||||||
|
"help_text": "Type: `string`. Additional arguments from BBMap to pass to BBSplit.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"nextflow input-output arguments" : {
|
||||||
|
"title": "Nextflow input-output arguments",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"publish_dir": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||||
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"param_list": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||||
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||||
|
"hidden": true
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"allOf": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/input"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/output"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,364 @@
|
|||||||
|
name: "bedtools_genomecov"
|
||||||
|
namespace: "bedtools"
|
||||||
|
version: "main"
|
||||||
|
authors:
|
||||||
|
- name: "Theodoro Gasperin Terra Camargo"
|
||||||
|
roles:
|
||||||
|
- "author"
|
||||||
|
- "maintainer"
|
||||||
|
info:
|
||||||
|
links:
|
||||||
|
email: "theodorogtc@gmail.com"
|
||||||
|
github: "tgaspe"
|
||||||
|
linkedin: "theodoro-gasperin-terra-camargo"
|
||||||
|
organizations:
|
||||||
|
- name: "Data Intuitive"
|
||||||
|
href: "https://www.data-intuitive.com"
|
||||||
|
role: "Bioinformatician"
|
||||||
|
argument_groups:
|
||||||
|
- name: "Inputs"
|
||||||
|
arguments:
|
||||||
|
- type: "file"
|
||||||
|
name: "--input"
|
||||||
|
alternatives:
|
||||||
|
- "-i"
|
||||||
|
description: "The input file (BED/GFF/VCF) to be used.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "input.bed"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--input_bam"
|
||||||
|
alternatives:
|
||||||
|
- "-ibam"
|
||||||
|
description: "The input file is in BAM format.\nNote: BAM _must_ be sorted by\
|
||||||
|
\ positions.\n'--genome' option is ignored if you use '--input_bam' option!\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--genome"
|
||||||
|
alternatives:
|
||||||
|
- "-g"
|
||||||
|
description: "The genome file to be used.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "genome.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- name: "Outputs"
|
||||||
|
arguments:
|
||||||
|
- type: "file"
|
||||||
|
name: "--output"
|
||||||
|
description: "The output BED file. \n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "output.bed"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: true
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- name: "Options"
|
||||||
|
arguments:
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--depth"
|
||||||
|
alternatives:
|
||||||
|
- "-d"
|
||||||
|
description: "Report the depth at each genome position (with one-based coordinates).\n\
|
||||||
|
Default behavior is to report a histogram.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--depth_zero"
|
||||||
|
alternatives:
|
||||||
|
- "-dz"
|
||||||
|
description: "Report the depth at each genome position (with zero-based coordinates).\n\
|
||||||
|
Reports only non-zero positions.\nDefault behavior is to report a histogram.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--bed_graph"
|
||||||
|
alternatives:
|
||||||
|
- "-bg"
|
||||||
|
description: "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--bed_graph_zero_coverage"
|
||||||
|
alternatives:
|
||||||
|
- "-bga"
|
||||||
|
description: "Report depth in BedGraph format, as above (-bg).\nHowever with this\
|
||||||
|
\ option, regions with zero \ncoverage are also reported. This allows one to\n\
|
||||||
|
quickly extract all regions of a genome with 0 \ncoverage by applying: \"grep\
|
||||||
|
\ -w 0$\" to the output.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--split"
|
||||||
|
description: "Treat \"split\" BAM or BED12 entries as distinct BED intervals.\n\
|
||||||
|
when computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\"\
|
||||||
|
\ operations \nto infer the blocks for computing coverage.\nFor BED12 files,\
|
||||||
|
\ this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns\
|
||||||
|
\ 10,11,12).\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--ignore_deletion"
|
||||||
|
alternatives:
|
||||||
|
- "-ignoreD"
|
||||||
|
description: "Ignore local deletions (CIGAR \"D\" operations) in BAM entries\n\
|
||||||
|
when computing coverage.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "string"
|
||||||
|
name: "--strand"
|
||||||
|
description: "Calculate coverage of intervals from a specific strand.\nWith BED\
|
||||||
|
\ files, requires at least 6 columns (strand is column 6). \n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- "+"
|
||||||
|
- "-"
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--pair_end_coverage"
|
||||||
|
alternatives:
|
||||||
|
- "-pc"
|
||||||
|
description: "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--fragment_size"
|
||||||
|
alternatives:
|
||||||
|
- "-fs"
|
||||||
|
description: "Force to use provided fragment size instead of read length\nWorks\
|
||||||
|
\ for BAM files only\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--du"
|
||||||
|
description: "Change strand af the mate read (so both reads from the same strand)\
|
||||||
|
\ useful for strand specific\nWorks for BAM files only\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--five_prime"
|
||||||
|
alternatives:
|
||||||
|
- "-5"
|
||||||
|
description: "Calculate coverage of 5\" positions (instead of entire interval).\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--three_prime"
|
||||||
|
alternatives:
|
||||||
|
- "-3"
|
||||||
|
description: "Calculate coverage of 3\" positions (instead of entire interval).\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--max"
|
||||||
|
description: "Combine all positions with a depth >= max into\na single bin in\
|
||||||
|
\ the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
min: 0
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "double"
|
||||||
|
name: "--scale"
|
||||||
|
description: "Scale the coverage by a constant factor.\nEach coverage value is\
|
||||||
|
\ multiplied by this factor before being reported.\nUseful for normalizing coverage\
|
||||||
|
\ by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
min: 0.0
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--trackline"
|
||||||
|
description: "Adds a UCSC/Genome-Browser track line definition in the first line\
|
||||||
|
\ of the output.\n- See here for more details about track line definition:\n\
|
||||||
|
\ http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding\
|
||||||
|
\ a trackline definition, the output BedGraph can be easily\n uploaded\
|
||||||
|
\ to the Genome Browser as a custom track,\n BUT CAN NOT be converted into\
|
||||||
|
\ a BigWig file (w/o removing the first line).\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "string"
|
||||||
|
name: "--trackopts"
|
||||||
|
description: "Writes additional track line definition parameters in the first\
|
||||||
|
\ line.\n- Example:\n -trackopts 'name=\"My Track\" visibility=2 color=255,30,30'\n\
|
||||||
|
\ Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "script.sh"
|
||||||
|
is_executable: true
|
||||||
|
description: "Compute the coverage of a feature file among a genome.\n"
|
||||||
|
test_resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "test.sh"
|
||||||
|
is_executable: true
|
||||||
|
- type: "file"
|
||||||
|
path: "test_data"
|
||||||
|
info: null
|
||||||
|
status: "enabled"
|
||||||
|
requirements:
|
||||||
|
commands:
|
||||||
|
- "ps"
|
||||||
|
keywords:
|
||||||
|
- "genome coverage"
|
||||||
|
- "BED"
|
||||||
|
- "GFF"
|
||||||
|
- "VCF"
|
||||||
|
- "BAM"
|
||||||
|
license: "MIT"
|
||||||
|
references:
|
||||||
|
doi:
|
||||||
|
- "10.1093/bioinformatics/btq033"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/arq5x/bedtools2"
|
||||||
|
homepage: "https://bedtools.readthedocs.io/en/latest/#"
|
||||||
|
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html"
|
||||||
|
issue_tracker: "https://github.com/arq5x/bedtools2/issues"
|
||||||
|
runners:
|
||||||
|
- type: "executable"
|
||||||
|
id: "executable"
|
||||||
|
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||||
|
- type: "nextflow"
|
||||||
|
id: "nextflow"
|
||||||
|
directives:
|
||||||
|
tag: "$id"
|
||||||
|
auto:
|
||||||
|
simplifyInput: true
|
||||||
|
simplifyOutput: false
|
||||||
|
transcript: false
|
||||||
|
publish: false
|
||||||
|
config:
|
||||||
|
labels:
|
||||||
|
mem1gb: "memory = 1000000000.B"
|
||||||
|
mem2gb: "memory = 2000000000.B"
|
||||||
|
mem5gb: "memory = 5000000000.B"
|
||||||
|
mem10gb: "memory = 10000000000.B"
|
||||||
|
mem20gb: "memory = 20000000000.B"
|
||||||
|
mem50gb: "memory = 50000000000.B"
|
||||||
|
mem100gb: "memory = 100000000000.B"
|
||||||
|
mem200gb: "memory = 200000000000.B"
|
||||||
|
mem500gb: "memory = 500000000000.B"
|
||||||
|
mem1tb: "memory = 1000000000000.B"
|
||||||
|
mem2tb: "memory = 2000000000000.B"
|
||||||
|
mem5tb: "memory = 5000000000000.B"
|
||||||
|
mem10tb: "memory = 10000000000000.B"
|
||||||
|
mem20tb: "memory = 20000000000000.B"
|
||||||
|
mem50tb: "memory = 50000000000000.B"
|
||||||
|
mem100tb: "memory = 100000000000000.B"
|
||||||
|
mem200tb: "memory = 200000000000000.B"
|
||||||
|
mem500tb: "memory = 500000000000000.B"
|
||||||
|
mem1gib: "memory = 1073741824.B"
|
||||||
|
mem2gib: "memory = 2147483648.B"
|
||||||
|
mem4gib: "memory = 4294967296.B"
|
||||||
|
mem8gib: "memory = 8589934592.B"
|
||||||
|
mem16gib: "memory = 17179869184.B"
|
||||||
|
mem32gib: "memory = 34359738368.B"
|
||||||
|
mem64gib: "memory = 68719476736.B"
|
||||||
|
mem128gib: "memory = 137438953472.B"
|
||||||
|
mem256gib: "memory = 274877906944.B"
|
||||||
|
mem512gib: "memory = 549755813888.B"
|
||||||
|
mem1tib: "memory = 1099511627776.B"
|
||||||
|
mem2tib: "memory = 2199023255552.B"
|
||||||
|
mem4tib: "memory = 4398046511104.B"
|
||||||
|
mem8tib: "memory = 8796093022208.B"
|
||||||
|
mem16tib: "memory = 17592186044416.B"
|
||||||
|
mem32tib: "memory = 35184372088832.B"
|
||||||
|
mem64tib: "memory = 70368744177664.B"
|
||||||
|
mem128tib: "memory = 140737488355328.B"
|
||||||
|
mem256tib: "memory = 281474976710656.B"
|
||||||
|
mem512tib: "memory = 562949953421312.B"
|
||||||
|
cpu1: "cpus = 1"
|
||||||
|
cpu2: "cpus = 2"
|
||||||
|
cpu5: "cpus = 5"
|
||||||
|
cpu10: "cpus = 10"
|
||||||
|
cpu20: "cpus = 20"
|
||||||
|
cpu50: "cpus = 50"
|
||||||
|
cpu100: "cpus = 100"
|
||||||
|
cpu200: "cpus = 200"
|
||||||
|
cpu500: "cpus = 500"
|
||||||
|
cpu1000: "cpus = 1000"
|
||||||
|
debug: false
|
||||||
|
container: "docker"
|
||||||
|
engines:
|
||||||
|
- type: "docker"
|
||||||
|
id: "docker"
|
||||||
|
image: "debian:stable-slim"
|
||||||
|
target_registry: "images.viash-hub.com"
|
||||||
|
target_tag: "main"
|
||||||
|
namespace_separator: "/"
|
||||||
|
setup:
|
||||||
|
- type: "apt"
|
||||||
|
packages:
|
||||||
|
- "bedtools"
|
||||||
|
- "procps"
|
||||||
|
interactive: false
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "echo \"bedtools: \\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\"\"\
|
||||||
|
\ > /var/software_versions.txt\n"
|
||||||
|
entrypoint: []
|
||||||
|
cmd: null
|
||||||
|
- type: "native"
|
||||||
|
id: "native"
|
||||||
|
build_info:
|
||||||
|
config: "src/bedtools/bedtools_genomecov/config.vsh.yaml"
|
||||||
|
runner: "nextflow"
|
||||||
|
engine: "docker|native"
|
||||||
|
output: "target/nextflow/bedtools/bedtools_genomecov"
|
||||||
|
executable: "target/nextflow/bedtools/bedtools_genomecov/main.nf"
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
|
package_config:
|
||||||
|
name: "biobox"
|
||||||
|
version: "main"
|
||||||
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
|
info: null
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
source: "src"
|
||||||
|
target: "target"
|
||||||
|
config_mods:
|
||||||
|
- ".requirements.commands := ['ps']\n"
|
||||||
|
- ".engines += { type: \"native\" }"
|
||||||
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
// rsem_calculate_expression main
|
// bedtools_genomecov main
|
||||||
//
|
//
|
||||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||||
@@ -8,6 +8,9 @@
|
|||||||
// authors of this component should specify the license in the header of such
|
// authors of this component should specify the license in the header of such
|
||||||
// files, or include a separate license file detailing the licenses of all included
|
// files, or include a separate license file detailing the licenses of all included
|
||||||
// files.
|
// files.
|
||||||
|
//
|
||||||
|
// Component authors:
|
||||||
|
// * Theodoro Gasperin Terra Camargo (author, maintainer)
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// VDSL3 helper functions //
|
// VDSL3 helper functions //
|
||||||
@@ -2804,60 +2807,46 @@ nextflow.enable.dsl=2
|
|||||||
meta = [
|
meta = [
|
||||||
"resources_dir": moduleDir.toRealPath().normalize(),
|
"resources_dir": moduleDir.toRealPath().normalize(),
|
||||||
"config": processConfig(readJsonBlob('''{
|
"config": processConfig(readJsonBlob('''{
|
||||||
"name" : "rsem_calculate_expression",
|
"name" : "bedtools_genomecov",
|
||||||
"namespace" : "rsem",
|
"namespace" : "bedtools",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
|
"authors" : [
|
||||||
|
{
|
||||||
|
"name" : "Theodoro Gasperin Terra Camargo",
|
||||||
|
"roles" : [
|
||||||
|
"author",
|
||||||
|
"maintainer"
|
||||||
|
],
|
||||||
|
"info" : {
|
||||||
|
"links" : {
|
||||||
|
"email" : "theodorogtc@gmail.com",
|
||||||
|
"github" : "tgaspe",
|
||||||
|
"linkedin" : "theodoro-gasperin-terra-camargo"
|
||||||
|
},
|
||||||
|
"organizations" : [
|
||||||
|
{
|
||||||
|
"name" : "Data Intuitive",
|
||||||
|
"href" : "https://www.data-intuitive.com",
|
||||||
|
"role" : "Bioinformatician"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"argument_groups" : [
|
"argument_groups" : [
|
||||||
{
|
{
|
||||||
"name" : "Input",
|
"name" : "Inputs",
|
||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
|
||||||
"type" : "string",
|
|
||||||
"name" : "--id",
|
|
||||||
"description" : "Sample ID.",
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "string",
|
|
||||||
"name" : "--strandedness",
|
|
||||||
"description" : "Sample strand-specificity. Must be one of unstranded, forward, reverse",
|
|
||||||
"required" : false,
|
|
||||||
"choices" : [
|
|
||||||
"forward",
|
|
||||||
"reverse",
|
|
||||||
"unstranded"
|
|
||||||
],
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "boolean",
|
|
||||||
"name" : "--paired",
|
|
||||||
"description" : "Paired-end reads or not?",
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--input",
|
"name" : "--input",
|
||||||
"description" : "Input reads for quantification.",
|
"alternatives" : [
|
||||||
"must_exist" : true,
|
"-i"
|
||||||
"create_parent" : true,
|
],
|
||||||
"required" : false,
|
"description" : "The input file (BED/GFF/VCF) to be used.\n",
|
||||||
"direction" : "input",
|
"example" : [
|
||||||
"multiple" : true,
|
"input.bed"
|
||||||
"multiple_sep" : ";"
|
],
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"name" : "--index",
|
|
||||||
"description" : "RSEM index.",
|
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
@@ -2866,9 +2855,31 @@ meta = [
|
|||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "string",
|
"type" : "file",
|
||||||
"name" : "--extra_args",
|
"name" : "--input_bam",
|
||||||
"description" : "Extra rsem-calculate-expression arguments in addition to the defaults.",
|
"alternatives" : [
|
||||||
|
"-ibam"
|
||||||
|
],
|
||||||
|
"description" : "The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n'--genome' option is ignored if you use '--input_bam' option!\n",
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "file",
|
||||||
|
"name" : "--genome",
|
||||||
|
"alternatives" : [
|
||||||
|
"-g"
|
||||||
|
],
|
||||||
|
"description" : "The genome file to be used.\n",
|
||||||
|
"example" : [
|
||||||
|
"genome.txt"
|
||||||
|
],
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
@@ -2877,104 +2888,166 @@ meta = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name" : "Output",
|
"name" : "Outputs",
|
||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--counts_gene",
|
"name" : "--output",
|
||||||
"description" : "Expression counts on gene level",
|
"description" : "The output BED file. \n",
|
||||||
"example" : [
|
"example" : [
|
||||||
"sample.genes.results"
|
"output.bed"
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : true,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name" : "Options",
|
||||||
|
"arguments" : [
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--depth",
|
||||||
|
"alternatives" : [
|
||||||
|
"-d"
|
||||||
|
],
|
||||||
|
"description" : "Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--depth_zero",
|
||||||
|
"alternatives" : [
|
||||||
|
"-dz"
|
||||||
|
],
|
||||||
|
"description" : "Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--bed_graph",
|
||||||
|
"alternatives" : [
|
||||||
|
"-bg"
|
||||||
|
],
|
||||||
|
"description" : "Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--bed_graph_zero_coverage",
|
||||||
|
"alternatives" : [
|
||||||
|
"-bga"
|
||||||
|
],
|
||||||
|
"description" : "Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \\"grep -w 0$\\" to the output.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--split",
|
||||||
|
"description" : "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--ignore_deletion",
|
||||||
|
"alternatives" : [
|
||||||
|
"-ignoreD"
|
||||||
|
],
|
||||||
|
"description" : "Ignore local deletions (CIGAR \\"D\\" operations) in BAM entries\nwhen computing coverage.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "string",
|
||||||
|
"name" : "--strand",
|
||||||
|
"description" : "Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n",
|
||||||
|
"required" : false,
|
||||||
|
"choices" : [
|
||||||
|
"+",
|
||||||
|
"-"
|
||||||
|
],
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "boolean_true",
|
||||||
"name" : "--counts_transcripts",
|
"name" : "--pair_end_coverage",
|
||||||
"description" : "Expression counts on transcript level",
|
"alternatives" : [
|
||||||
"example" : [
|
"-pc"
|
||||||
"sample.isoforms.results"
|
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"description" : "Calculate coverage of pair-end fragments.\nWorks for BAM files only\n",
|
||||||
"create_parent" : true,
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--fragment_size",
|
||||||
|
"alternatives" : [
|
||||||
|
"-fs"
|
||||||
|
],
|
||||||
|
"description" : "Force to use provided fragment size instead of read length\nWorks for BAM files only\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--du",
|
||||||
|
"description" : "Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--five_prime",
|
||||||
|
"alternatives" : [
|
||||||
|
"-5"
|
||||||
|
],
|
||||||
|
"description" : "Calculate coverage of 5\\" positions (instead of entire interval).\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--three_prime",
|
||||||
|
"alternatives" : [
|
||||||
|
"-3"
|
||||||
|
],
|
||||||
|
"description" : "Calculate coverage of 3\\" positions (instead of entire interval).\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--max",
|
||||||
|
"description" : "Combine all positions with a depth >= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n",
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"min" : 0,
|
||||||
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "double",
|
||||||
"name" : "--stat",
|
"name" : "--scale",
|
||||||
"description" : "RSEM statistics",
|
"description" : "Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n",
|
||||||
"example" : [
|
|
||||||
"sample.stat"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"min" : 0.0,
|
||||||
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "boolean_true",
|
||||||
"name" : "--logs",
|
"name" : "--trackline",
|
||||||
"description" : "RSEM logs",
|
"description" : "Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n",
|
||||||
"example" : [
|
"direction" : "input"
|
||||||
"sample.log"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "output",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "string",
|
||||||
"name" : "--bam_star",
|
"name" : "--trackopts",
|
||||||
"description" : "BAM file generated by STAR (optional)",
|
"description" : "Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts 'name=\\"My Track\\" visibility=2 color=255,30,30'\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n",
|
||||||
"example" : [
|
|
||||||
"sample.STAR.genome.bam"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : true,
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"name" : "--bam_genome",
|
|
||||||
"description" : "Genome BAM file (optional)",
|
|
||||||
"example" : [
|
|
||||||
"sample.genome.bam"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "output",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"name" : "--bam_transcript",
|
|
||||||
"description" : "Transcript BAM file (optional)",
|
|
||||||
"example" : [
|
|
||||||
"sample.transcript.bam"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "output",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -2987,7 +3060,7 @@ meta = [
|
|||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description" : "Calculate expression with RSEM.\n",
|
"description" : "Compute the coverage of a feature file among a genome.\n",
|
||||||
"test_resources" : [
|
"test_resources" : [
|
||||||
{
|
{
|
||||||
"type" : "bash_script",
|
"type" : "bash_script",
|
||||||
@@ -2996,47 +3069,34 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz"
|
"path" : "test_data"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/minimal_test/reference/rsem.tar.gz"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"info" : {
|
|
||||||
"migration_info" : {
|
|
||||||
"git_repo" : "https://github.com/nf-core/rnaseq.git",
|
|
||||||
"paths" : [
|
|
||||||
"modules/nf-core/rsem/calculateexpression/main.nf",
|
|
||||||
"modules/nf-core/rsem/calculateexpression/meta.yml"
|
|
||||||
],
|
|
||||||
"last_sha" : "92b2a7857de1dda9d1c19a088941fc81e2976ff7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"status" : "enabled",
|
"status" : "enabled",
|
||||||
"requirements" : {
|
"requirements" : {
|
||||||
"commands" : [
|
"commands" : [
|
||||||
"ps"
|
"ps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"repositories" : [
|
"keywords" : [
|
||||||
{
|
"genome coverage",
|
||||||
"type" : "vsh",
|
"BED",
|
||||||
"name" : "biobox",
|
"GFF",
|
||||||
"repo" : "vsh/biobox",
|
"VCF",
|
||||||
"tag" : "main"
|
"BAM"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"references" : {
|
||||||
|
"doi" : [
|
||||||
|
"10.1093/bioinformatics/btq033"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/arq5x/bedtools2",
|
||||||
|
"homepage" : "https://bedtools.readthedocs.io/en/latest/#",
|
||||||
|
"documentation" : "https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html",
|
||||||
|
"issue_tracker" : "https://github.com/arq5x/bedtools2/issues"
|
||||||
|
},
|
||||||
"runners" : [
|
"runners" : [
|
||||||
{
|
{
|
||||||
"type" : "executable",
|
"type" : "executable",
|
||||||
@@ -3115,7 +3175,7 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "docker",
|
"type" : "docker",
|
||||||
"id" : "docker",
|
"id" : "docker",
|
||||||
"image" : "ubuntu:22.04",
|
"image" : "debian:stable-slim",
|
||||||
"target_registry" : "images.viash-hub.com",
|
"target_registry" : "images.viash-hub.com",
|
||||||
"target_tag" : "main",
|
"target_tag" : "main",
|
||||||
"namespace_separator" : "/",
|
"namespace_separator" : "/",
|
||||||
@@ -3123,31 +3183,15 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "apt",
|
"type" : "apt",
|
||||||
"packages" : [
|
"packages" : [
|
||||||
"build-essential",
|
"bedtools",
|
||||||
"gcc",
|
"procps"
|
||||||
"g++",
|
|
||||||
"make",
|
|
||||||
"wget",
|
|
||||||
"zlib1g-dev",
|
|
||||||
"unzip",
|
|
||||||
"xxd",
|
|
||||||
"perl",
|
|
||||||
"r-base",
|
|
||||||
"bowtie2",
|
|
||||||
"python3-pip",
|
|
||||||
"git"
|
|
||||||
],
|
],
|
||||||
"interactive" : false
|
"interactive" : false
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "docker",
|
"type" : "docker",
|
||||||
"run" : [
|
"run" : [
|
||||||
"ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \\\\\nunzip ${STAR_VERSION}.zip && \\\\\ncd STAR-${STAR_VERSION}/source && \\\\\nmake STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\\\ncp STAR /usr/local/bin && \\\\\ncd /tmp && \\\\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \\\\\nunzip v${RSEM_VERSION}.zip && \\\\\ncd RSEM-${RSEM_VERSION} && \\\\\nmake && \\\\\nmake install && \\\\\nrm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \\\\\nrm -rf /tmp/RSEM-${RSEM_VERSION} /tmp/v${RSEM_VERSION}.zip && \\\\\ncd && \\\\\napt-get clean && \\\\\necho 'export PATH=$PATH:/usr/local/bin' >> /etc/profile && \\\\\necho 'export PATH=$PATH:/usr/local/bin' >> ~/.bashrc && \\\\\n/bin/bash -c \\"source /etc/profile && source ~/.bashrc && echo $PATH && which STAR\\"\n"
|
"echo \\"bedtools: \\\\\\"$(bedtools --version | sed -n 's/^bedtools //p')\\\\\\"\\" > /var/software_versions.txt\n"
|
||||||
],
|
|
||||||
"env" : [
|
|
||||||
"STAR_VERSION=2.7.11b",
|
|
||||||
"RSEM_VERSION=1.3.3",
|
|
||||||
"TZ=Europe/Brussels"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -3158,49 +3202,39 @@ meta = [
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"build_info" : {
|
"build_info" : {
|
||||||
"config" : "/workdir/root/repo/src/rsem/rsem_calculate_expression/config.vsh.yaml",
|
"config" : "/workdir/root/repo/src/bedtools/bedtools_genomecov/config.vsh.yaml",
|
||||||
"runner" : "nextflow",
|
"runner" : "nextflow",
|
||||||
"engine" : "docker|native",
|
"engine" : "docker|native",
|
||||||
"output" : "/workdir/root/repo/target/nextflow/rsem/rsem_calculate_expression",
|
"output" : "target/nextflow/bedtools/bedtools_genomecov",
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
|
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
|
||||||
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
|
||||||
|
"git_tag" : "v0.2.0-26-ga13b57d"
|
||||||
},
|
},
|
||||||
"package_config" : {
|
"package_config" : {
|
||||||
"name" : "rnaseq",
|
"name" : "biobox",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"info" : {
|
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
|
||||||
"test_resources" : [
|
|
||||||
{
|
|
||||||
"path" : "gs://viash-hub-test-data/rnaseq/v1",
|
|
||||||
"dest" : "testData"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"repositories" : [
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"source" : "/workdir/root/repo/src",
|
"source" : "src",
|
||||||
"target" : "/workdir/root/repo/target",
|
"target" : "target",
|
||||||
"config_mods" : [
|
"config_mods" : [
|
||||||
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
|
".requirements.commands := ['ps']\n",
|
||||||
".engines += { type: \\"native\\" }",
|
".engines += { type: \\"native\\" }",
|
||||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||||
".engines[.type == 'docker'].target_tag := 'main'"
|
".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
],
|
],
|
||||||
"organization" : "vsh"
|
"keywords" : [
|
||||||
|
"bioinformatics",
|
||||||
|
"modules",
|
||||||
|
"sequencing"
|
||||||
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"organization" : "vsh",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/viash-hub/biobox",
|
||||||
|
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'''))
|
}'''))
|
||||||
]
|
]
|
||||||
@@ -3214,21 +3248,30 @@ def innerWorkflowFactory(args) {
|
|||||||
def rawScript = '''set -e
|
def rawScript = '''set -e
|
||||||
tempscript=".viash_script.sh"
|
tempscript=".viash_script.sh"
|
||||||
cat > "$tempscript" << VIASHMAIN
|
cat > "$tempscript" << VIASHMAIN
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
## VIASH START
|
## VIASH START
|
||||||
# The following code has been auto-generated by Viash.
|
# The following code has been auto-generated by Viash.
|
||||||
$( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "${VIASH_PAR_ID}" | sed "s#'#'\\"'\\"'#g;s#.*#par_id='&'#" ; else echo "# par_id="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT_BAM+x} ]; then echo "${VIASH_PAR_INPUT_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_bam='&'#" ; else echo "# par_input_bam="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi )
|
$( if [ ! -z ${VIASH_PAR_GENOME+x} ]; then echo "${VIASH_PAR_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_genome='&'#" ; else echo "# par_genome="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_COUNTS_GENE+x} ]; then echo "${VIASH_PAR_COUNTS_GENE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_gene='&'#" ; else echo "# par_counts_gene="; fi )
|
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_COUNTS_TRANSCRIPTS+x} ]; then echo "${VIASH_PAR_COUNTS_TRANSCRIPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts_transcripts='&'#" ; else echo "# par_counts_transcripts="; fi )
|
$( if [ ! -z ${VIASH_PAR_DEPTH+x} ]; then echo "${VIASH_PAR_DEPTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth='&'#" ; else echo "# par_depth="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_STAT+x} ]; then echo "${VIASH_PAR_STAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_stat='&'#" ; else echo "# par_stat="; fi )
|
$( if [ ! -z ${VIASH_PAR_DEPTH_ZERO+x} ]; then echo "${VIASH_PAR_DEPTH_ZERO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_depth_zero='&'#" ; else echo "# par_depth_zero="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_LOGS+x} ]; then echo "${VIASH_PAR_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_logs='&'#" ; else echo "# par_logs="; fi )
|
$( if [ ! -z ${VIASH_PAR_BED_GRAPH+x} ]; then echo "${VIASH_PAR_BED_GRAPH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph='&'#" ; else echo "# par_bed_graph="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_BAM_STAR+x} ]; then echo "${VIASH_PAR_BAM_STAR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_star='&'#" ; else echo "# par_bam_star="; fi )
|
$( if [ ! -z ${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE+x} ]; then echo "${VIASH_PAR_BED_GRAPH_ZERO_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bed_graph_zero_coverage='&'#" ; else echo "# par_bed_graph_zero_coverage="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_BAM_GENOME+x} ]; then echo "${VIASH_PAR_BAM_GENOME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_genome='&'#" ; else echo "# par_bam_genome="; fi )
|
$( if [ ! -z ${VIASH_PAR_SPLIT+x} ]; then echo "${VIASH_PAR_SPLIT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_split='&'#" ; else echo "# par_split="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_BAM_TRANSCRIPT+x} ]; then echo "${VIASH_PAR_BAM_TRANSCRIPT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam_transcript='&'#" ; else echo "# par_bam_transcript="; fi )
|
$( if [ ! -z ${VIASH_PAR_IGNORE_DELETION+x} ]; then echo "${VIASH_PAR_IGNORE_DELETION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ignore_deletion='&'#" ; else echo "# par_ignore_deletion="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_STRAND+x} ]; then echo "${VIASH_PAR_STRAND}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strand='&'#" ; else echo "# par_strand="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_PAIR_END_COVERAGE+x} ]; then echo "${VIASH_PAR_PAIR_END_COVERAGE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pair_end_coverage='&'#" ; else echo "# par_pair_end_coverage="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_FRAGMENT_SIZE+x} ]; then echo "${VIASH_PAR_FRAGMENT_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_size='&'#" ; else echo "# par_fragment_size="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_DU+x} ]; then echo "${VIASH_PAR_DU}" | sed "s#'#'\\"'\\"'#g;s#.*#par_du='&'#" ; else echo "# par_du="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_FIVE_PRIME+x} ]; then echo "${VIASH_PAR_FIVE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_five_prime='&'#" ; else echo "# par_five_prime="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_THREE_PRIME+x} ]; then echo "${VIASH_PAR_THREE_PRIME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_three_prime='&'#" ; else echo "# par_three_prime="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_MAX+x} ]; then echo "${VIASH_PAR_MAX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_max='&'#" ; else echo "# par_max="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_SCALE+x} ]; then echo "${VIASH_PAR_SCALE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_scale='&'#" ; else echo "# par_scale="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_TRACKLINE+x} ]; then echo "${VIASH_PAR_TRACKLINE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackline='&'#" ; else echo "# par_trackline="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_TRACKOPTS+x} ]; then echo "${VIASH_PAR_TRACKOPTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_trackopts='&'#" ; else echo "# par_trackopts="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
||||||
@@ -3249,47 +3292,57 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
|
|||||||
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
||||||
|
|
||||||
## VIASH END
|
## VIASH END
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
|
# Exit on error
|
||||||
set -eo pipefail
|
set -eo pipefail
|
||||||
|
|
||||||
function clean_up {
|
# Unset variables
|
||||||
rm -rf "\\$tmpdir"
|
unset_if_false=(
|
||||||
}
|
par_input_bam
|
||||||
trap clean_up EXIT
|
par_depth
|
||||||
|
par_depth_zero
|
||||||
|
par_bed_graph
|
||||||
|
par_bed_graph_zero_coverage
|
||||||
|
par_split
|
||||||
|
par_ignore_deletion
|
||||||
|
par_pair_end_coverage
|
||||||
|
par_fragment_size
|
||||||
|
par_du
|
||||||
|
par_five_prime
|
||||||
|
par_three_prime
|
||||||
|
par_trackline
|
||||||
|
)
|
||||||
|
|
||||||
tmpdir=\\$(mktemp -d "\\$meta_temp_dir/\\$meta_functionality_name-XXXXXXXX")
|
for par in \\${unset_if_false[@]}; do
|
||||||
|
test_val="\\${!par}"
|
||||||
|
[[ "\\$test_val" == "false" ]] && unset \\$par
|
||||||
|
done
|
||||||
|
|
||||||
[[ "\\$par_paired" == "false" ]] && unset par_paired
|
# Create input array
|
||||||
|
IFS=";" read -ra trackopts <<< \\$par_trackopts
|
||||||
|
|
||||||
if [ \\$par_strandedness == 'forward' ]; then
|
bedtools genomecov \\\\
|
||||||
strandedness='--strandedness forward'
|
\\${par_depth:+-d} \\\\
|
||||||
elif [ \\$par_strandedness == 'reverse' ]; then
|
\\${par_depth_zero:+-dz} \\\\
|
||||||
strandedness='--strandedness reverse'
|
\\${par_bed_graph:+-bg} \\\\
|
||||||
else
|
\\${par_bed_graph_zero_coverage:+-bga} \\\\
|
||||||
strandedness=''
|
\\${par_split:+-split} \\\\
|
||||||
fi
|
\\${par_ignore_deletion:+-ignoreD} \\\\
|
||||||
|
\\${par_du:+-du} \\\\
|
||||||
IFS=";" read -ra input <<< \\$par_input
|
\\${par_five_prime:+-5} \\\\
|
||||||
|
\\${par_three_prime:+-3} \\\\
|
||||||
INDEX=\\`find -L \\$par_index/ -name "*.grp" | sed 's/\\\\.grp\\$//'\\`
|
\\${par_trackline:+-trackline} \\\\
|
||||||
|
\\${par_strand:+-strand "\\$par_strand"} \\\\
|
||||||
rsem-calculate-expression \\\\
|
\\${par_max:+-max "\\$par_max"} \\\\
|
||||||
\\${meta_cpus:+--num-threads \\$meta_cpus} \\\\
|
\\${par_scale:+-scale "\\$par_scale"} \\\\
|
||||||
\\$strandedness \\\\
|
\\${par_trackopts:+-trackopts "\\${trackopts[*]}"} \\\\
|
||||||
\\${par_paired:+--paired-end} \\\\
|
\\${par_input_bam:+-ibam "\\$par_input_bam"} \\\\
|
||||||
\\$par_extra_args \\\\
|
\\${par_input:+-i "\\$par_input"} \\\\
|
||||||
\\${input[*]} \\\\
|
\\${par_genome:+-g "\\$par_genome"} \\\\
|
||||||
\\$INDEX \\\\
|
\\${par_pair_end_coverage:+-pc} \\\\
|
||||||
\\$par_id
|
\\${par_fragment_size:+-fs} \\\\
|
||||||
|
> "\\$par_output"
|
||||||
|
|
||||||
[[ -e "\\${par_id}.genes.results" ]] && mv "\\${par_id}.genes.results" \\$par_counts_gene
|
|
||||||
[[ -e "\\${par_id}id.isoforms.results" ]] && mv "\\${par_id}id.isoforms.results" \\$par_counts_transcripts
|
|
||||||
[[ -e "\\${par_id}.stat" ]] && mv "\\${par_id}.stat" \\$par_stat
|
|
||||||
# [[ -e "\\${par_id}.log" ]] && mv "\\${par_id}.log" \\$par_logs
|
|
||||||
[[ -e "\\${par_id}.STAR.genome.bam" ]] && mv "\\${par_id}.STAR.genome.bam" \\$par_bam_star
|
|
||||||
[[ -e "\\${par_id}.genome.bam" ]] && mv "\\${par_id}.genome.bam" \\$par_bam_genome
|
|
||||||
[[ -e "\\${par_id}.transcript.bam" ]] && mv "\\${par_id}.transcript.bam" \\$par_bam_transcript
|
|
||||||
VIASHMAIN
|
VIASHMAIN
|
||||||
bash "$tempscript"
|
bash "$tempscript"
|
||||||
'''
|
'''
|
||||||
@@ -3650,7 +3703,7 @@ meta["defaults"] = [
|
|||||||
directives: readJsonBlob('''{
|
directives: readJsonBlob('''{
|
||||||
"container" : {
|
"container" : {
|
||||||
"registry" : "images.viash-hub.com",
|
"registry" : "images.viash-hub.com",
|
||||||
"image" : "vsh/rnaseq/rsem/rsem_calculate_expression",
|
"image" : "vsh/biobox/bedtools/bedtools_genomecov",
|
||||||
"tag" : "main"
|
"tag" : "main"
|
||||||
},
|
},
|
||||||
"tag" : "$id"
|
"tag" : "$id"
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
manifest {
|
||||||
|
name = 'bedtools/bedtools_genomecov'
|
||||||
|
mainScript = 'main.nf'
|
||||||
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
|
version = 'main'
|
||||||
|
description = 'Compute the coverage of a feature file among a genome.\n'
|
||||||
|
author = 'Theodoro Gasperin Terra Camargo'
|
||||||
|
}
|
||||||
|
|
||||||
|
process.container = 'nextflow/bash:latest'
|
||||||
|
|
||||||
|
// detect tempdir
|
||||||
|
tempDir = java.nio.file.Paths.get(
|
||||||
|
System.getenv('NXF_TEMP') ?:
|
||||||
|
System.getenv('VIASH_TEMP') ?:
|
||||||
|
System.getenv('TEMPDIR') ?:
|
||||||
|
System.getenv('TMPDIR') ?:
|
||||||
|
'/tmp'
|
||||||
|
).toAbsolutePath()
|
||||||
|
|
||||||
|
profiles {
|
||||||
|
no_publish {
|
||||||
|
process {
|
||||||
|
withName: '.*' {
|
||||||
|
publishDir = [
|
||||||
|
enabled: false
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mount_temp {
|
||||||
|
docker.temp = tempDir
|
||||||
|
podman.temp = tempDir
|
||||||
|
charliecloud.temp = tempDir
|
||||||
|
}
|
||||||
|
docker {
|
||||||
|
docker.enabled = true
|
||||||
|
// docker.userEmulation = true
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
singularity {
|
||||||
|
singularity.enabled = true
|
||||||
|
singularity.autoMounts = true
|
||||||
|
docker.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
podman {
|
||||||
|
podman.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
shifter {
|
||||||
|
shifter.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
charliecloud {
|
||||||
|
charliecloud.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process{
|
||||||
|
withLabel: mem1gb { memory = 1000000000.B }
|
||||||
|
withLabel: mem2gb { memory = 2000000000.B }
|
||||||
|
withLabel: mem5gb { memory = 5000000000.B }
|
||||||
|
withLabel: mem10gb { memory = 10000000000.B }
|
||||||
|
withLabel: mem20gb { memory = 20000000000.B }
|
||||||
|
withLabel: mem50gb { memory = 50000000000.B }
|
||||||
|
withLabel: mem100gb { memory = 100000000000.B }
|
||||||
|
withLabel: mem200gb { memory = 200000000000.B }
|
||||||
|
withLabel: mem500gb { memory = 500000000000.B }
|
||||||
|
withLabel: mem1tb { memory = 1000000000000.B }
|
||||||
|
withLabel: mem2tb { memory = 2000000000000.B }
|
||||||
|
withLabel: mem5tb { memory = 5000000000000.B }
|
||||||
|
withLabel: mem10tb { memory = 10000000000000.B }
|
||||||
|
withLabel: mem20tb { memory = 20000000000000.B }
|
||||||
|
withLabel: mem50tb { memory = 50000000000000.B }
|
||||||
|
withLabel: mem100tb { memory = 100000000000000.B }
|
||||||
|
withLabel: mem200tb { memory = 200000000000000.B }
|
||||||
|
withLabel: mem500tb { memory = 500000000000000.B }
|
||||||
|
withLabel: mem1gib { memory = 1073741824.B }
|
||||||
|
withLabel: mem2gib { memory = 2147483648.B }
|
||||||
|
withLabel: mem4gib { memory = 4294967296.B }
|
||||||
|
withLabel: mem8gib { memory = 8589934592.B }
|
||||||
|
withLabel: mem16gib { memory = 17179869184.B }
|
||||||
|
withLabel: mem32gib { memory = 34359738368.B }
|
||||||
|
withLabel: mem64gib { memory = 68719476736.B }
|
||||||
|
withLabel: mem128gib { memory = 137438953472.B }
|
||||||
|
withLabel: mem256gib { memory = 274877906944.B }
|
||||||
|
withLabel: mem512gib { memory = 549755813888.B }
|
||||||
|
withLabel: mem1tib { memory = 1099511627776.B }
|
||||||
|
withLabel: mem2tib { memory = 2199023255552.B }
|
||||||
|
withLabel: mem4tib { memory = 4398046511104.B }
|
||||||
|
withLabel: mem8tib { memory = 8796093022208.B }
|
||||||
|
withLabel: mem16tib { memory = 17592186044416.B }
|
||||||
|
withLabel: mem32tib { memory = 35184372088832.B }
|
||||||
|
withLabel: mem64tib { memory = 70368744177664.B }
|
||||||
|
withLabel: mem128tib { memory = 140737488355328.B }
|
||||||
|
withLabel: mem256tib { memory = 281474976710656.B }
|
||||||
|
withLabel: mem512tib { memory = 562949953421312.B }
|
||||||
|
withLabel: cpu1 { cpus = 1 }
|
||||||
|
withLabel: cpu2 { cpus = 2 }
|
||||||
|
withLabel: cpu5 { cpus = 5 }
|
||||||
|
withLabel: cpu10 { cpus = 10 }
|
||||||
|
withLabel: cpu20 { cpus = 20 }
|
||||||
|
withLabel: cpu50 { cpus = 50 }
|
||||||
|
withLabel: cpu100 { cpus = 100 }
|
||||||
|
withLabel: cpu200 { cpus = 200 }
|
||||||
|
withLabel: cpu500 { cpus = 500 }
|
||||||
|
withLabel: cpu1000 { cpus = 1000 }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,303 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
|
"title": "bedtools_genomecov",
|
||||||
|
"description": "Compute the coverage of a feature file among a genome.\n",
|
||||||
|
"type": "object",
|
||||||
|
"definitions": {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"inputs" : {
|
||||||
|
"title": "Inputs",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"input": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used",
|
||||||
|
"help_text": "Type: `file`, example: `input.bed`. The input file (BED/GFF/VCF) to be used.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"input_bam": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. The input file is in BAM format",
|
||||||
|
"help_text": "Type: `file`. The input file is in BAM format.\nNote: BAM _must_ be sorted by positions.\n\u0027--genome\u0027 option is ignored if you use \u0027--input_bam\u0027 option!\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"genome": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, example: `genome.txt`. The genome file to be used",
|
||||||
|
"help_text": "Type: `file`, example: `genome.txt`. The genome file to be used.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"outputs" : {
|
||||||
|
"title": "Outputs",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"output": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file",
|
||||||
|
"help_text": "Type: `file`, required, default: `$id.$key.output.bed`, example: `output.bed`. The output BED file. \n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.output.bed"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"options" : {
|
||||||
|
"title": "Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"depth": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates)",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with one-based coordinates).\nDefault behavior is to report a histogram.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"depth_zero": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates)",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Report the depth at each genome position (with zero-based coordinates).\nReports only non-zero positions.\nDefault behavior is to report a histogram.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bed_graph": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format. For details, see:\ngenome.ucsc.edu/goldenPath/help/bedgraph.html\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bed_graph_zero_coverage": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg)",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Report depth in BedGraph format, as above (-bg).\nHowever with this option, regions with zero \ncoverage are also reported. This allows one to\nquickly extract all regions of a genome with 0 \ncoverage by applying: \"grep -w 0$\" to the output.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"split": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Treat \"split\" BAM or BED12 entries as distinct BED intervals.\nwhen computing coverage.\nFor BAM files, this uses the CIGAR \"N\" and \"D\" operations \nto infer the blocks for computing coverage.\nFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds\nfields (i.e., columns 10,11,12).\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ignore_deletion": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Ignore local deletions (CIGAR \"D\" operations) in BAM entries\nwhen computing coverage.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"strand": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand",
|
||||||
|
"help_text": "Type: `string`, choices: ``+`, `-``. Calculate coverage of intervals from a specific strand.\nWith BED files, requires at least 6 columns (strand is column 6). \n",
|
||||||
|
"enum": ["+", "-"]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"pair_end_coverage": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of pair-end fragments.\nWorks for BAM files only\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fragment_size": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Force to use provided fragment size instead of read length\nWorks for BAM files only\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"du": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Change strand af the mate read (so both reads from the same strand) useful for strand specific\nWorks for BAM files only\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"five_prime": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval)",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 5\" positions (instead of entire interval).\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"three_prime": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval)",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Calculate coverage of 3\" positions (instead of entire interval).\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"max": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram",
|
||||||
|
"help_text": "Type: `integer`. Combine all positions with a depth \u003e= max into\na single bin in the histogram. Irrelevant\nfor -d and -bedGraph\n- (INTEGER)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"scale": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`. Scale the coverage by a constant factor",
|
||||||
|
"help_text": "Type: `double`. Scale the coverage by a constant factor.\nEach coverage value is multiplied by this factor before being reported.\nUseful for normalizing coverage by, e.g., reads per million (RPM).\n- Default is 1.0; i.e., unscaled.\n- (FLOAT)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"trackline": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Adds a UCSC/Genome-Browser track line definition in the first line of the output.\n- See here for more details about track line definition:\n http://genome.ucsc.edu/goldenPath/help/bedgraph.html\n- NOTE: When adding a trackline definition, the output BedGraph can be easily\n uploaded to the Genome Browser as a custom track,\n BUT CAN NOT be converted into a BigWig file (w/o removing the first line).\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"trackopts": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line",
|
||||||
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. Writes additional track line definition parameters in the first line.\n- Example:\n -trackopts \u0027name=\"My Track\" visibility=2 color=255,30,30\u0027\n Note the use of single-quotes if you have spaces in your parameters.\n- (TEXT)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"nextflow input-output arguments" : {
|
||||||
|
"title": "Nextflow input-output arguments",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"publish_dir": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||||
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"param_list": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||||
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||||
|
"hidden": true
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"allOf": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/inputs"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/outputs"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -0,0 +1,367 @@
|
|||||||
|
name: "fastqc"
|
||||||
|
version: "main"
|
||||||
|
authors:
|
||||||
|
- name: "Theodoro Gasperin Terra Camargo"
|
||||||
|
roles:
|
||||||
|
- "author"
|
||||||
|
- "maintainer"
|
||||||
|
info:
|
||||||
|
links:
|
||||||
|
email: "theodorogtc@gmail.com"
|
||||||
|
github: "tgaspe"
|
||||||
|
linkedin: "theodoro-gasperin-terra-camargo"
|
||||||
|
organizations:
|
||||||
|
- name: "Data Intuitive"
|
||||||
|
href: "https://www.data-intuitive.com"
|
||||||
|
role: "Bioinformatician"
|
||||||
|
argument_groups:
|
||||||
|
- name: "Inputs"
|
||||||
|
arguments:
|
||||||
|
- type: "file"
|
||||||
|
name: "--input"
|
||||||
|
description: "FASTQ file(s) to be analyzed.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "input.fq"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: true
|
||||||
|
direction: "input"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- name: "Outputs"
|
||||||
|
description: "At least one of the output options (--html, --zip, --summary, --data)\
|
||||||
|
\ must be used.\n"
|
||||||
|
arguments:
|
||||||
|
- type: "file"
|
||||||
|
name: "--html"
|
||||||
|
description: "Create the HTML report of the results. \n'*' wild card must be provided\
|
||||||
|
\ in the output file name. \nWild card will be replaced by the input file basename.\n\
|
||||||
|
e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\
|
||||||
|
\ html file named sample_1.html\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "*.html"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--zip"
|
||||||
|
description: "Create the zip file(s) containing: html report, data, images, icons,\
|
||||||
|
\ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\
|
||||||
|
\ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\
|
||||||
|
\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "*.zip"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--summary"
|
||||||
|
description: "Create the summary file(s).\n'*' wild card must be provided in the\
|
||||||
|
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
|
||||||
|
\ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\
|
||||||
|
\ an output summary.txt file named sample_1_summary.txt\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "*_summary.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--data"
|
||||||
|
description: "Create the data file(s).\n'*' wild card must be provided in the\
|
||||||
|
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
|
||||||
|
\ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\
|
||||||
|
\ output data.txt file named sample_1_data.txt\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "*_data.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- name: "Options"
|
||||||
|
arguments:
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--casava"
|
||||||
|
description: "Files come from raw casava output. Files in the same sample\ngroup\
|
||||||
|
\ (differing only by the group number) will be analysed\nas a set rather than\
|
||||||
|
\ individually. Sequences with the filter\nflag set in the header will be excluded\
|
||||||
|
\ from the analysis.\nFiles must have the same names given to them by casava\n\
|
||||||
|
(including being gzipped and ending with .gz) otherwise they\nwon't be grouped\
|
||||||
|
\ together correctly.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--nano"
|
||||||
|
description: "Files come from nanopore sequences and are in fast5 format. In\n\
|
||||||
|
this mode you can pass in directories to process and the program\nwill take\
|
||||||
|
\ in all fast5 files within those directories and produce\na single output file\
|
||||||
|
\ from the sequences found in all files.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--nofilter"
|
||||||
|
description: "If running with --casava then don't remove read flagged by\ncasava\
|
||||||
|
\ as poor quality when performing the QC analysis.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--nogroup"
|
||||||
|
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
|
||||||
|
\ data for every base in the read. \nWARNING: Using this option will cause fastqc\
|
||||||
|
\ to crash \nand burn if you use it on really long reads, and your \nplots may\
|
||||||
|
\ end up a ridiculous size. You have been warned!\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--min_length"
|
||||||
|
description: "Sets an artificial lower limit on the length of the \nsequence to\
|
||||||
|
\ be shown in the report. As long as you \nset this to a value greater or equal\
|
||||||
|
\ to your longest \nread length then this will be the sequence length used \n\
|
||||||
|
to create your read groups. This can be useful for making\ndirectly comparable\
|
||||||
|
\ statistics from datasets with somewhat \nvariable read lengths.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--format"
|
||||||
|
alternatives:
|
||||||
|
- "-f"
|
||||||
|
description: "Bypasses the normal sequence file format detection and \nforces\
|
||||||
|
\ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\
|
||||||
|
\ sam_mapped, and fastq.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "bam"
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--contaminants"
|
||||||
|
alternatives:
|
||||||
|
- "-c"
|
||||||
|
description: "Specifies a non-default file which contains the list \nof contaminants\
|
||||||
|
\ to screen overrepresented sequences against. \nThe file must contain sets\
|
||||||
|
\ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\
|
||||||
|
\ a hash will be ignored.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "contaminants.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--adapters"
|
||||||
|
alternatives:
|
||||||
|
- "-a"
|
||||||
|
description: "Specifies a non-default file which contains the list of \nadapter\
|
||||||
|
\ sequences which will be explicitly searched against \nthe library. The file\
|
||||||
|
\ must contain sets of named adapters \nin the form name[tab]sequence. Lines\
|
||||||
|
\ prefixed with a hash will be ignored.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "adapters.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--limits"
|
||||||
|
alternatives:
|
||||||
|
- "-l"
|
||||||
|
description: "Specifies a non-default file which contains \na set of criteria\
|
||||||
|
\ which will be used to determine \nthe warn/error limits for the various modules.\
|
||||||
|
\ \nThis file can also be used to selectively remove \nsome modules from the\
|
||||||
|
\ output altogether. The format \nneeds to mirror the default limits.txt file\
|
||||||
|
\ found in \nthe Configuration folder.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "limits.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--kmers"
|
||||||
|
alternatives:
|
||||||
|
- "-k"
|
||||||
|
description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\
|
||||||
|
\ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\
|
||||||
|
\ specified.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 7
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--quiet"
|
||||||
|
alternatives:
|
||||||
|
- "-q"
|
||||||
|
description: "Suppress all progress messages on stdout and only report errors.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "script.sh"
|
||||||
|
is_executable: true
|
||||||
|
description: "FastQC - A high throughput sequence QC analysis tool."
|
||||||
|
test_resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "test.sh"
|
||||||
|
is_executable: true
|
||||||
|
info: null
|
||||||
|
status: "enabled"
|
||||||
|
requirements:
|
||||||
|
commands:
|
||||||
|
- "ps"
|
||||||
|
keywords:
|
||||||
|
- "Quality control"
|
||||||
|
- "BAM"
|
||||||
|
- "SAM"
|
||||||
|
- "FASTQ"
|
||||||
|
license: "GPL-3.0, Apache-2.0"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/s-andrews/FastQC"
|
||||||
|
homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
|
||||||
|
documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/"
|
||||||
|
issue_tracker: "https://github.com/s-andrews/FastQC/issues"
|
||||||
|
runners:
|
||||||
|
- type: "executable"
|
||||||
|
id: "executable"
|
||||||
|
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||||
|
- type: "nextflow"
|
||||||
|
id: "nextflow"
|
||||||
|
directives:
|
||||||
|
tag: "$id"
|
||||||
|
auto:
|
||||||
|
simplifyInput: true
|
||||||
|
simplifyOutput: false
|
||||||
|
transcript: false
|
||||||
|
publish: false
|
||||||
|
config:
|
||||||
|
labels:
|
||||||
|
mem1gb: "memory = 1000000000.B"
|
||||||
|
mem2gb: "memory = 2000000000.B"
|
||||||
|
mem5gb: "memory = 5000000000.B"
|
||||||
|
mem10gb: "memory = 10000000000.B"
|
||||||
|
mem20gb: "memory = 20000000000.B"
|
||||||
|
mem50gb: "memory = 50000000000.B"
|
||||||
|
mem100gb: "memory = 100000000000.B"
|
||||||
|
mem200gb: "memory = 200000000000.B"
|
||||||
|
mem500gb: "memory = 500000000000.B"
|
||||||
|
mem1tb: "memory = 1000000000000.B"
|
||||||
|
mem2tb: "memory = 2000000000000.B"
|
||||||
|
mem5tb: "memory = 5000000000000.B"
|
||||||
|
mem10tb: "memory = 10000000000000.B"
|
||||||
|
mem20tb: "memory = 20000000000000.B"
|
||||||
|
mem50tb: "memory = 50000000000000.B"
|
||||||
|
mem100tb: "memory = 100000000000000.B"
|
||||||
|
mem200tb: "memory = 200000000000000.B"
|
||||||
|
mem500tb: "memory = 500000000000000.B"
|
||||||
|
mem1gib: "memory = 1073741824.B"
|
||||||
|
mem2gib: "memory = 2147483648.B"
|
||||||
|
mem4gib: "memory = 4294967296.B"
|
||||||
|
mem8gib: "memory = 8589934592.B"
|
||||||
|
mem16gib: "memory = 17179869184.B"
|
||||||
|
mem32gib: "memory = 34359738368.B"
|
||||||
|
mem64gib: "memory = 68719476736.B"
|
||||||
|
mem128gib: "memory = 137438953472.B"
|
||||||
|
mem256gib: "memory = 274877906944.B"
|
||||||
|
mem512gib: "memory = 549755813888.B"
|
||||||
|
mem1tib: "memory = 1099511627776.B"
|
||||||
|
mem2tib: "memory = 2199023255552.B"
|
||||||
|
mem4tib: "memory = 4398046511104.B"
|
||||||
|
mem8tib: "memory = 8796093022208.B"
|
||||||
|
mem16tib: "memory = 17592186044416.B"
|
||||||
|
mem32tib: "memory = 35184372088832.B"
|
||||||
|
mem64tib: "memory = 70368744177664.B"
|
||||||
|
mem128tib: "memory = 140737488355328.B"
|
||||||
|
mem256tib: "memory = 281474976710656.B"
|
||||||
|
mem512tib: "memory = 562949953421312.B"
|
||||||
|
cpu1: "cpus = 1"
|
||||||
|
cpu2: "cpus = 2"
|
||||||
|
cpu5: "cpus = 5"
|
||||||
|
cpu10: "cpus = 10"
|
||||||
|
cpu20: "cpus = 20"
|
||||||
|
cpu50: "cpus = 50"
|
||||||
|
cpu100: "cpus = 100"
|
||||||
|
cpu200: "cpus = 200"
|
||||||
|
cpu500: "cpus = 500"
|
||||||
|
cpu1000: "cpus = 1000"
|
||||||
|
debug: false
|
||||||
|
container: "docker"
|
||||||
|
engines:
|
||||||
|
- type: "docker"
|
||||||
|
id: "docker"
|
||||||
|
image: "biocontainers/fastqc:v0.11.9_cv8"
|
||||||
|
target_registry: "images.viash-hub.com"
|
||||||
|
target_tag: "main"
|
||||||
|
namespace_separator: "/"
|
||||||
|
setup:
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n"
|
||||||
|
entrypoint: []
|
||||||
|
cmd: null
|
||||||
|
- type: "native"
|
||||||
|
id: "native"
|
||||||
|
build_info:
|
||||||
|
config: "src/fastqc/config.vsh.yaml"
|
||||||
|
runner: "nextflow"
|
||||||
|
engine: "docker|native"
|
||||||
|
output: "target/nextflow/fastqc"
|
||||||
|
executable: "target/nextflow/fastqc/main.nf"
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
|
package_config:
|
||||||
|
name: "biobox"
|
||||||
|
version: "main"
|
||||||
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
|
info: null
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
source: "src"
|
||||||
|
target: "target"
|
||||||
|
config_mods:
|
||||||
|
- ".requirements.commands := ['ps']\n"
|
||||||
|
- ".engines += { type: \"native\" }"
|
||||||
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
3827
target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf
Normal file
3827
target/dependencies/vsh/vsh/biobox/main/nextflow/fastqc/main.nf
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,126 @@
|
|||||||
|
manifest {
|
||||||
|
name = 'fastqc'
|
||||||
|
mainScript = 'main.nf'
|
||||||
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
|
version = 'main'
|
||||||
|
description = 'FastQC - A high throughput sequence QC analysis tool.'
|
||||||
|
author = 'Theodoro Gasperin Terra Camargo'
|
||||||
|
}
|
||||||
|
|
||||||
|
process.container = 'nextflow/bash:latest'
|
||||||
|
|
||||||
|
// detect tempdir
|
||||||
|
tempDir = java.nio.file.Paths.get(
|
||||||
|
System.getenv('NXF_TEMP') ?:
|
||||||
|
System.getenv('VIASH_TEMP') ?:
|
||||||
|
System.getenv('TEMPDIR') ?:
|
||||||
|
System.getenv('TMPDIR') ?:
|
||||||
|
'/tmp'
|
||||||
|
).toAbsolutePath()
|
||||||
|
|
||||||
|
profiles {
|
||||||
|
no_publish {
|
||||||
|
process {
|
||||||
|
withName: '.*' {
|
||||||
|
publishDir = [
|
||||||
|
enabled: false
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mount_temp {
|
||||||
|
docker.temp = tempDir
|
||||||
|
podman.temp = tempDir
|
||||||
|
charliecloud.temp = tempDir
|
||||||
|
}
|
||||||
|
docker {
|
||||||
|
docker.enabled = true
|
||||||
|
// docker.userEmulation = true
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
singularity {
|
||||||
|
singularity.enabled = true
|
||||||
|
singularity.autoMounts = true
|
||||||
|
docker.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
podman {
|
||||||
|
podman.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
shifter {
|
||||||
|
shifter.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
charliecloud {
|
||||||
|
charliecloud.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process{
|
||||||
|
withLabel: mem1gb { memory = 1000000000.B }
|
||||||
|
withLabel: mem2gb { memory = 2000000000.B }
|
||||||
|
withLabel: mem5gb { memory = 5000000000.B }
|
||||||
|
withLabel: mem10gb { memory = 10000000000.B }
|
||||||
|
withLabel: mem20gb { memory = 20000000000.B }
|
||||||
|
withLabel: mem50gb { memory = 50000000000.B }
|
||||||
|
withLabel: mem100gb { memory = 100000000000.B }
|
||||||
|
withLabel: mem200gb { memory = 200000000000.B }
|
||||||
|
withLabel: mem500gb { memory = 500000000000.B }
|
||||||
|
withLabel: mem1tb { memory = 1000000000000.B }
|
||||||
|
withLabel: mem2tb { memory = 2000000000000.B }
|
||||||
|
withLabel: mem5tb { memory = 5000000000000.B }
|
||||||
|
withLabel: mem10tb { memory = 10000000000000.B }
|
||||||
|
withLabel: mem20tb { memory = 20000000000000.B }
|
||||||
|
withLabel: mem50tb { memory = 50000000000000.B }
|
||||||
|
withLabel: mem100tb { memory = 100000000000000.B }
|
||||||
|
withLabel: mem200tb { memory = 200000000000000.B }
|
||||||
|
withLabel: mem500tb { memory = 500000000000000.B }
|
||||||
|
withLabel: mem1gib { memory = 1073741824.B }
|
||||||
|
withLabel: mem2gib { memory = 2147483648.B }
|
||||||
|
withLabel: mem4gib { memory = 4294967296.B }
|
||||||
|
withLabel: mem8gib { memory = 8589934592.B }
|
||||||
|
withLabel: mem16gib { memory = 17179869184.B }
|
||||||
|
withLabel: mem32gib { memory = 34359738368.B }
|
||||||
|
withLabel: mem64gib { memory = 68719476736.B }
|
||||||
|
withLabel: mem128gib { memory = 137438953472.B }
|
||||||
|
withLabel: mem256gib { memory = 274877906944.B }
|
||||||
|
withLabel: mem512gib { memory = 549755813888.B }
|
||||||
|
withLabel: mem1tib { memory = 1099511627776.B }
|
||||||
|
withLabel: mem2tib { memory = 2199023255552.B }
|
||||||
|
withLabel: mem4tib { memory = 4398046511104.B }
|
||||||
|
withLabel: mem8tib { memory = 8796093022208.B }
|
||||||
|
withLabel: mem16tib { memory = 17592186044416.B }
|
||||||
|
withLabel: mem32tib { memory = 35184372088832.B }
|
||||||
|
withLabel: mem64tib { memory = 70368744177664.B }
|
||||||
|
withLabel: mem128tib { memory = 140737488355328.B }
|
||||||
|
withLabel: mem256tib { memory = 281474976710656.B }
|
||||||
|
withLabel: mem512tib { memory = 562949953421312.B }
|
||||||
|
withLabel: cpu1 { cpus = 1 }
|
||||||
|
withLabel: cpu2 { cpus = 2 }
|
||||||
|
withLabel: cpu5 { cpus = 5 }
|
||||||
|
withLabel: cpu10 { cpus = 10 }
|
||||||
|
withLabel: cpu20 { cpus = 20 }
|
||||||
|
withLabel: cpu50 { cpus = 50 }
|
||||||
|
withLabel: cpu100 { cpus = 100 }
|
||||||
|
withLabel: cpu200 { cpus = 200 }
|
||||||
|
withLabel: cpu500 { cpus = 500 }
|
||||||
|
withLabel: cpu1000 { cpus = 1000 }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,257 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
|
"title": "fastqc",
|
||||||
|
"description": "FastQC - A high throughput sequence QC analysis tool.",
|
||||||
|
"type": "object",
|
||||||
|
"definitions": {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"inputs" : {
|
||||||
|
"title": "Inputs",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"input": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed",
|
||||||
|
"help_text": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"outputs" : {
|
||||||
|
"title": "Outputs",
|
||||||
|
"type": "object",
|
||||||
|
"description": "At least one of the output options (--html, --zip, --summary, --data) must be used.\n",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"html": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results",
|
||||||
|
"help_text": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results. \n\u0027*\u0027 wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output html file named sample_1.html\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.html_*.html"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"zip": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc",
|
||||||
|
"help_text": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.zip_*.zip"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"summary": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s)",
|
||||||
|
"help_text": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create an output summary.txt file named sample_1_summary.txt\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.summary_*.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"data": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s)",
|
||||||
|
"help_text": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an output data.txt file named sample_1_data.txt\n"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.data_*.txt"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"options" : {
|
||||||
|
"title": "Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"casava": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Files come from raw casava output",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon\u0027t be grouped together correctly.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"nano": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"nofilter": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"nogroup": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"min_length": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report",
|
||||||
|
"help_text": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"format": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format",
|
||||||
|
"help_text": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"contaminants": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against",
|
||||||
|
"help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"adapters": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library",
|
||||||
|
"help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"limits": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules",
|
||||||
|
"help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"kmers": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module",
|
||||||
|
"help_text": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"quiet": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"nextflow input-output arguments" : {
|
||||||
|
"title": "Nextflow input-output arguments",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"publish_dir": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||||
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"param_list": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||||
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||||
|
"hidden": true
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"allOf": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/inputs"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/outputs"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -1,22 +1,23 @@
|
|||||||
name: "umitools_dedup"
|
name: "fq_subsample"
|
||||||
namespace: "umitools"
|
|
||||||
version: "main"
|
version: "main"
|
||||||
argument_groups:
|
argument_groups:
|
||||||
- name: "Input"
|
- name: "Input"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "boolean"
|
- type: "file"
|
||||||
name: "--paired"
|
name: "--input_1"
|
||||||
description: "Paired fastq files or not?"
|
description: "First input fastq file to subsample. Accepts both raw and gzipped\
|
||||||
|
\ FASTQ inputs."
|
||||||
info: null
|
info: null
|
||||||
default:
|
must_exist: true
|
||||||
- false
|
create_parent: true
|
||||||
required: false
|
required: true
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--bam"
|
name: "--input_2"
|
||||||
description: "Input BAM file"
|
description: "Second input fastq files to subsample. Accepts both raw and gzipped\
|
||||||
|
\ FASTQ inputs."
|
||||||
info: null
|
info: null
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
@@ -24,32 +25,12 @@ argument_groups:
|
|||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
|
||||||
name: "--bai"
|
|
||||||
description: "BAM index"
|
|
||||||
info: null
|
|
||||||
must_exist: true
|
|
||||||
create_parent: true
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- type: "boolean"
|
|
||||||
name: "--get_output_stats"
|
|
||||||
description: "Whether or not to generate output stats."
|
|
||||||
info: null
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--output_bam"
|
name: "--output_1"
|
||||||
description: "Deduplicated BAM file"
|
description: "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.$key.bam"
|
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: false
|
||||||
@@ -57,51 +38,68 @@ argument_groups:
|
|||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--output_stats"
|
name: "--output_2"
|
||||||
description: "Directory containing UMI based dedupllication statistics files"
|
description: "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.umi_dedup.stats"
|
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: false
|
||||||
direction: "output"
|
direction: "output"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
|
- name: "Options"
|
||||||
|
arguments:
|
||||||
|
- type: "double"
|
||||||
|
name: "--probability"
|
||||||
|
description: "The probability a record is kept, as a percentage (0.0, 1.0). Cannot\
|
||||||
|
\ be used with `record-count`"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--record_count"
|
||||||
|
description: "The exact number of records to keep. Cannot be used with `probability`"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--seed"
|
||||||
|
description: "Seed to use for the random number generator"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
resources:
|
resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "script.sh"
|
path: "script.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
description: "Deduplicate reads based on the mapping co-ordinate and the UMI attached\
|
description: "fq subsample outputs a subset of records from single or paired FASTQ\
|
||||||
\ to the read.\n"
|
\ files."
|
||||||
test_resources:
|
test_resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "test.sh"
|
path: "test.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
- type: "file"
|
- type: "file"
|
||||||
path: "chr19.bam"
|
path: "test_data"
|
||||||
- type: "file"
|
info: null
|
||||||
path: "chr19.bam.bai"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: "https://github.com/nf-core/rnaseq.git"
|
|
||||||
paths:
|
|
||||||
- "modules/nf-core/umitools/dedup/main.nf"
|
|
||||||
- "modules/nf-core/umitools/dedup/meta.yml"
|
|
||||||
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
|
|
||||||
status: "enabled"
|
status: "enabled"
|
||||||
requirements:
|
requirements:
|
||||||
commands:
|
commands:
|
||||||
- "ps"
|
- "ps"
|
||||||
repositories:
|
keywords:
|
||||||
- type: "vsh"
|
- "fastq"
|
||||||
name: "biobox"
|
- "subsample"
|
||||||
repo: "vsh/biobox"
|
- "subset"
|
||||||
tag: "main"
|
license: "MIT"
|
||||||
- type: "vsh"
|
links:
|
||||||
name: "craftbox"
|
repository: "https://github.com/stjude-rust-labs/fq"
|
||||||
repo: "craftbox"
|
homepage: "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
|
||||||
tag: "v0.1.0"
|
documentation: "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
|
||||||
runners:
|
runners:
|
||||||
- type: "executable"
|
- type: "executable"
|
||||||
id: "executable"
|
id: "executable"
|
||||||
@@ -170,56 +168,50 @@ runners:
|
|||||||
engines:
|
engines:
|
||||||
- type: "docker"
|
- type: "docker"
|
||||||
id: "docker"
|
id: "docker"
|
||||||
image: "ubuntu:22.04"
|
image: "rust:1.81-slim"
|
||||||
target_registry: "images.viash-hub.com"
|
target_registry: "images.viash-hub.com"
|
||||||
target_tag: "main"
|
target_tag: "main"
|
||||||
namespace_separator: "/"
|
namespace_separator: "/"
|
||||||
setup:
|
setup:
|
||||||
- type: "apt"
|
- type: "docker"
|
||||||
packages:
|
run:
|
||||||
- "pip"
|
- "apt-get update && apt-get install -y git procps && \\\ngit clone --depth 1\
|
||||||
interactive: false
|
\ --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\ncd fq &&\
|
||||||
- type: "python"
|
\ \\\ncargo install --locked --path . && \\\nmv target/release/fq /usr/local/bin/\
|
||||||
user: false
|
\ && \\\ncd / && rm -rf /fq\n"
|
||||||
packages:
|
|
||||||
- "umi_tools"
|
|
||||||
upgrade: true
|
|
||||||
entrypoint: []
|
entrypoint: []
|
||||||
cmd: null
|
cmd: null
|
||||||
- type: "native"
|
- type: "native"
|
||||||
id: "native"
|
id: "native"
|
||||||
build_info:
|
build_info:
|
||||||
config: "src/umitools/umitools_dedup/config.vsh.yaml"
|
config: "src/fq_subsample/config.vsh.yaml"
|
||||||
runner: "nextflow"
|
runner: "nextflow"
|
||||||
engine: "docker|native"
|
engine: "docker|native"
|
||||||
output: "target/nextflow/umitools/umitools_dedup"
|
output: "target/nextflow/fq_subsample"
|
||||||
executable: "target/nextflow/umitools/umitools_dedup/main.nf"
|
executable: "target/nextflow/fq_subsample/main.nf"
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
package_config:
|
package_config:
|
||||||
name: "rnaseq"
|
name: "biobox"
|
||||||
version: "main"
|
version: "main"
|
||||||
info:
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
test_resources:
|
info: null
|
||||||
- path: "gs://viash-hub-test-data/rnaseq/v1"
|
|
||||||
dest: "testData"
|
|
||||||
repositories:
|
|
||||||
- type: "vsh"
|
|
||||||
name: "biobox"
|
|
||||||
repo: "vsh/biobox"
|
|
||||||
tag: "main"
|
|
||||||
- type: "vsh"
|
|
||||||
name: "craftbox"
|
|
||||||
repo: "craftbox"
|
|
||||||
tag: "v0.1.0"
|
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
source: "src"
|
source: "src"
|
||||||
target: "target"
|
target: "target"
|
||||||
config_mods:
|
config_mods:
|
||||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
|
- ".requirements.commands := ['ps']\n"
|
||||||
\ := '$id'\n"
|
|
||||||
- ".engines += { type: \"native\" }"
|
- ".engines += { type: \"native\" }"
|
||||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
organization: "vsh"
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
@@ -2812,22 +2812,21 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--input",
|
"name" : "--input_1",
|
||||||
"description" : "Input fastq files to subsample",
|
"description" : "First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs.",
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : true,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : true,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "string",
|
"type" : "file",
|
||||||
"name" : "--extra_args",
|
"name" : "--input_2",
|
||||||
"description" : "Extra arguments to pass to fq subsample",
|
"description" : "Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs.",
|
||||||
"default" : [
|
"must_exist" : true,
|
||||||
""
|
"create_parent" : true,
|
||||||
],
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
@@ -2836,15 +2835,12 @@ meta = [
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name" : "Input",
|
"name" : "Output",
|
||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--output_1",
|
"name" : "--output_1",
|
||||||
"description" : "Sampled read 1 fastq files",
|
"description" : "Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`.",
|
||||||
"default" : [
|
|
||||||
"$id.read_1.subsampled.fastq"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
@@ -2855,11 +2851,8 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--output_2",
|
"name" : "--output_2",
|
||||||
"description" : "Sampled read 2 fastq files",
|
"description" : "Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`.",
|
||||||
"default" : [
|
"must_exist" : true,
|
||||||
"$id.read_2.subsampled.fastq"
|
|
||||||
],
|
|
||||||
"must_exist" : false,
|
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
@@ -2867,6 +2860,38 @@ meta = [
|
|||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name" : "Options",
|
||||||
|
"arguments" : [
|
||||||
|
{
|
||||||
|
"type" : "double",
|
||||||
|
"name" : "--probability",
|
||||||
|
"description" : "The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--record_count",
|
||||||
|
"description" : "The exact number of records to keep. Cannot be used with `probability`",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--seed",
|
||||||
|
"description" : "Seed to use for the random number generator",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"resources" : [
|
"resources" : [
|
||||||
@@ -2876,7 +2901,7 @@ meta = [
|
|||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description" : "fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n",
|
"description" : "fq subsample outputs a subset of records from single or paired FASTQ files.",
|
||||||
"test_resources" : [
|
"test_resources" : [
|
||||||
{
|
{
|
||||||
"type" : "bash_script",
|
"type" : "bash_script",
|
||||||
@@ -2885,43 +2910,26 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz"
|
"path" : "test_data"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"info" : {
|
|
||||||
"migration_info" : {
|
|
||||||
"git_repo" : "https://github.com/nf-core/rnaseq.git",
|
|
||||||
"paths" : [
|
|
||||||
"modules/nf-core/fq/subsample/main.nf",
|
|
||||||
"modules/nf-core/fq/subsample/meta.yml"
|
|
||||||
],
|
|
||||||
"last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"status" : "enabled",
|
"status" : "enabled",
|
||||||
"requirements" : {
|
"requirements" : {
|
||||||
"commands" : [
|
"commands" : [
|
||||||
"ps"
|
"ps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"repositories" : [
|
"keywords" : [
|
||||||
{
|
"fastq",
|
||||||
"type" : "vsh",
|
"subsample",
|
||||||
"name" : "biobox",
|
"subset"
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/stjude-rust-labs/fq",
|
||||||
|
"homepage" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md",
|
||||||
|
"documentation" : "https://github.com/stjude-rust-labs/fq/blob/master/README.md"
|
||||||
|
},
|
||||||
"runners" : [
|
"runners" : [
|
||||||
{
|
{
|
||||||
"type" : "executable",
|
"type" : "executable",
|
||||||
@@ -3000,7 +3008,7 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "docker",
|
"type" : "docker",
|
||||||
"id" : "docker",
|
"id" : "docker",
|
||||||
"image" : "ubuntu:22.04",
|
"image" : "rust:1.81-slim",
|
||||||
"target_registry" : "images.viash-hub.com",
|
"target_registry" : "images.viash-hub.com",
|
||||||
"target_tag" : "main",
|
"target_tag" : "main",
|
||||||
"namespace_separator" : "/",
|
"namespace_separator" : "/",
|
||||||
@@ -3008,10 +3016,7 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "docker",
|
"type" : "docker",
|
||||||
"run" : [
|
"run" : [
|
||||||
"ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \\\\\napt-get update && \\\\\napt-get install -y --no-install-recommends build-essential git-all curl && \\\\\ncurl https://sh.rustup.rs -sSf | sh -s -- -y && \\\\\n. \\"$HOME/.cargo/env\\" && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\nmv fq /usr/local/ && cd /usr/local/fq && \\\\\ncargo install --locked --path . && \\\\\nmv /usr/local/fq/target/release/fq /usr/local/bin/\n"
|
"apt-get update && apt-get install -y git procps && \\\\\ngit clone --depth 1 --branch v0.12.0 https://github.com/stjude-rust-labs/fq.git && \\\\\ncd fq && \\\\\ncargo install --locked --path . && \\\\\nmv target/release/fq /usr/local/bin/ && \\\\\ncd / && rm -rf /fq\n"
|
||||||
],
|
|
||||||
"env" : [
|
|
||||||
"TZ=Europe/Brussels"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -3025,46 +3030,36 @@ meta = [
|
|||||||
"config" : "/workdir/root/repo/src/fq_subsample/config.vsh.yaml",
|
"config" : "/workdir/root/repo/src/fq_subsample/config.vsh.yaml",
|
||||||
"runner" : "nextflow",
|
"runner" : "nextflow",
|
||||||
"engine" : "docker|native",
|
"engine" : "docker|native",
|
||||||
"output" : "/workdir/root/repo/target/nextflow/fq_subsample",
|
"output" : "target/nextflow/fq_subsample",
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
|
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
|
||||||
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
|
||||||
|
"git_tag" : "v0.2.0-26-ga13b57d"
|
||||||
},
|
},
|
||||||
"package_config" : {
|
"package_config" : {
|
||||||
"name" : "rnaseq",
|
"name" : "biobox",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"info" : {
|
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
|
||||||
"test_resources" : [
|
|
||||||
{
|
|
||||||
"path" : "gs://viash-hub-test-data/rnaseq/v1",
|
|
||||||
"dest" : "testData"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"repositories" : [
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"source" : "/workdir/root/repo/src",
|
"source" : "src",
|
||||||
"target" : "/workdir/root/repo/target",
|
"target" : "target",
|
||||||
"config_mods" : [
|
"config_mods" : [
|
||||||
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
|
".requirements.commands := ['ps']\n",
|
||||||
".engines += { type: \\"native\\" }",
|
".engines += { type: \\"native\\" }",
|
||||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||||
".engines[.type == 'docker'].target_tag := 'main'"
|
".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
],
|
],
|
||||||
"organization" : "vsh"
|
"keywords" : [
|
||||||
|
"bioinformatics",
|
||||||
|
"modules",
|
||||||
|
"sequencing"
|
||||||
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"organization" : "vsh",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/viash-hub/biobox",
|
||||||
|
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'''))
|
}'''))
|
||||||
]
|
]
|
||||||
@@ -3078,12 +3073,17 @@ def innerWorkflowFactory(args) {
|
|||||||
def rawScript = '''set -e
|
def rawScript = '''set -e
|
||||||
tempscript=".viash_script.sh"
|
tempscript=".viash_script.sh"
|
||||||
cat > "$tempscript" << VIASHMAIN
|
cat > "$tempscript" << VIASHMAIN
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
## VIASH START
|
## VIASH START
|
||||||
# The following code has been auto-generated by Viash.
|
# The following code has been auto-generated by Viash.
|
||||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT_1+x} ]; then echo "${VIASH_PAR_INPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_1='&'#" ; else echo "# par_input_1="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_EXTRA_ARGS+x} ]; then echo "${VIASH_PAR_EXTRA_ARGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_extra_args='&'#" ; else echo "# par_extra_args="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT_2+x} ]; then echo "${VIASH_PAR_INPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_2='&'#" ; else echo "# par_input_2="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_OUTPUT_1+x} ]; then echo "${VIASH_PAR_OUTPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_1='&'#" ; else echo "# par_output_1="; fi )
|
$( if [ ! -z ${VIASH_PAR_OUTPUT_1+x} ]; then echo "${VIASH_PAR_OUTPUT_1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_1='&'#" ; else echo "# par_output_1="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_OUTPUT_2+x} ]; then echo "${VIASH_PAR_OUTPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_2='&'#" ; else echo "# par_output_2="; fi )
|
$( if [ ! -z ${VIASH_PAR_OUTPUT_2+x} ]; then echo "${VIASH_PAR_OUTPUT_2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_2='&'#" ; else echo "# par_output_2="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_PROBABILITY+x} ]; then echo "${VIASH_PAR_PROBABILITY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_probability='&'#" ; else echo "# par_probability="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_RECORD_COUNT+x} ]; then echo "${VIASH_PAR_RECORD_COUNT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_record_count='&'#" ; else echo "# par_record_count="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
||||||
@@ -3104,29 +3104,27 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
|
|||||||
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
||||||
|
|
||||||
## VIASH END
|
## VIASH END
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
set -eo pipefail
|
||||||
|
|
||||||
IFS=";" read -ra input <<< \\$par_input
|
|
||||||
n_fastq=\\${#input[@]}
|
|
||||||
|
|
||||||
required_args=("-p" "--probability" "-n" "--read-count")
|
required_args=("-p" "--probability" "-n" "--record_count")
|
||||||
for arg in "\\${required_args[@]}"; do
|
|
||||||
if [[ "\\$par_extra_args" == *"\\$arg"* ]]; then
|
|
||||||
echo "FQ/SUBSAMPLE requires either --probability (-p) or --record-count (-n) to be specified with --extra_args"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ \\$n_fastq -eq 1 ]; then
|
# exclusive OR for required arguments \\$par_probability and \\$par_record_count
|
||||||
fq subsample \\$par_extra_args \\${input[*]} --r1-dst \\$par_output_1
|
if [[ -n \\$par_probability && -n \\$par_record_count ]] || [[ -z \\$par_probability && -z \\$par_record_count ]]; then
|
||||||
elif [ \\$n_fastq -eq 2 ]; then
|
echo "FQ/SUBSAMPLE requires either --probability or --record_count to be specified"
|
||||||
fq subsample \\$par_extra_args \\${input[*]} --r1-dst \\$par_output_1 --r2-dst \\$par_output_2
|
|
||||||
else
|
|
||||||
echo "FQ/SUBSAMPLE only accepts 1 or 2 FASTQ files!"
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
fq subsample \\\\
|
||||||
|
\\${par_output_1:+--r1-dst "\\${par_output_1}"} \\\\
|
||||||
|
\\${par_output_2:+--r2-dst "\\${par_output_2}"} \\\\
|
||||||
|
\\${par_probability:+--probability "\\${par_probability}"} \\\\
|
||||||
|
\\${par_record_count:+--record-count "\\${par_record_count}"} \\\\
|
||||||
|
\\${par_seed:+--seed "\\${par_seed}"} \\\\
|
||||||
|
\\${par_input_1} \\\\
|
||||||
|
\\${par_input_2}
|
||||||
VIASHMAIN
|
VIASHMAIN
|
||||||
bash "$tempscript"
|
bash "$tempscript"
|
||||||
'''
|
'''
|
||||||
@@ -3487,7 +3485,7 @@ meta["defaults"] = [
|
|||||||
directives: readJsonBlob('''{
|
directives: readJsonBlob('''{
|
||||||
"container" : {
|
"container" : {
|
||||||
"registry" : "images.viash-hub.com",
|
"registry" : "images.viash-hub.com",
|
||||||
"image" : "vsh/rnaseq/fq_subsample",
|
"image" : "vsh/biobox/fq_subsample",
|
||||||
"tag" : "main"
|
"tag" : "main"
|
||||||
},
|
},
|
||||||
"tag" : "$id"
|
"tag" : "$id"
|
||||||
@@ -3,7 +3,7 @@ manifest {
|
|||||||
mainScript = 'main.nf'
|
mainScript = 'main.nf'
|
||||||
nextflowVersion = '!>=20.12.1-edge'
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
version = 'main'
|
version = 'main'
|
||||||
description = 'fq subsample outputs a subset of records from single or paired FASTQ files. This requires a seed (--seed) to be set in ext.args\n'
|
description = 'fq subsample outputs a subset of records from single or paired FASTQ files.'
|
||||||
}
|
}
|
||||||
|
|
||||||
process.container = 'nextflow/bash:latest'
|
process.container = 'nextflow/bash:latest'
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema",
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
"title": "umitools_dedup",
|
"title": "fq_subsample",
|
||||||
"description": "Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.\n",
|
"description": "fq subsample outputs a subset of records from single or paired FASTQ files.",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"definitions": {
|
"definitions": {
|
||||||
|
|
||||||
@@ -14,42 +14,21 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"paired": {
|
"input_1": {
|
||||||
"type":
|
|
||||||
"boolean",
|
|
||||||
"description": "Type: `boolean`, default: `false`. Paired fastq files or not?",
|
|
||||||
"help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?"
|
|
||||||
,
|
|
||||||
"default":false
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"bam": {
|
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`. Input BAM file",
|
"description": "Type: `file`, required. First input fastq file to subsample",
|
||||||
"help_text": "Type: `file`. Input BAM file"
|
"help_text": "Type: `file`, required. First input fastq file to subsample. Accepts both raw and gzipped FASTQ inputs."
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"bai": {
|
"input_2": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`. BAM index",
|
"description": "Type: `file`. Second input fastq files to subsample",
|
||||||
"help_text": "Type: `file`. BAM index"
|
"help_text": "Type: `file`. Second input fastq files to subsample. Accepts both raw and gzipped FASTQ inputs."
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"get_output_stats": {
|
|
||||||
"type":
|
|
||||||
"boolean",
|
|
||||||
"description": "Type: `boolean`. Whether or not to generate output stats",
|
|
||||||
"help_text": "Type: `boolean`. Whether or not to generate output stats."
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,24 +44,64 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"output_bam": {
|
"output_1": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, default: `$id.$key.output_bam.bam`. Deduplicated BAM file",
|
"description": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.output_bam.bam`. Deduplicated BAM file"
|
"help_text": "Type: `file`, default: `$id.$key.output_1.output_1`. Sampled read 1 fastq files. Output will be gzipped if ends in `.gz`."
|
||||||
,
|
,
|
||||||
"default":"$id.$key.output_bam.bam"
|
"default": "$id.$key.output_1.output_1"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"output_stats": {
|
"output_2": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, default: `$id.$key.output_stats.stats`. Directory containing UMI based dedupllication statistics files",
|
"description": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.output_stats.stats`. Directory containing UMI based dedupllication statistics files"
|
"help_text": "Type: `file`, default: `$id.$key.output_2.output_2`. Sampled read 2 fastq files. Output will be gzipped if ends in `.gz`."
|
||||||
,
|
,
|
||||||
"default":"$id.$key.output_stats.stats"
|
"default": "$id.$key.output_2.output_2"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"options" : {
|
||||||
|
"title": "Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"probability": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`. The probability a record is kept, as a percentage (0",
|
||||||
|
"help_text": "Type: `double`. The probability a record is kept, as a percentage (0.0, 1.0). Cannot be used with `record-count`"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"record_count": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. The exact number of records to keep",
|
||||||
|
"help_text": "Type: `integer`. The exact number of records to keep. Cannot be used with `probability`"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"seed": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. Seed to use for the random number generator",
|
||||||
|
"help_text": "Type: `integer`. Seed to use for the random number generator"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -130,6 +149,10 @@
|
|||||||
"$ref": "#/definitions/output"
|
"$ref": "#/definitions/output"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/options"
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"$ref": "#/definitions/nextflow input-output arguments"
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
}
|
}
|
||||||
@@ -5,18 +5,22 @@ argument_groups:
|
|||||||
- name: "Input"
|
- name: "Input"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--transcriptome_fasta"
|
name: "--input"
|
||||||
|
description: "Path to a FASTA-file containing the transcriptome sequences, either\
|
||||||
|
\ in plain text or \ncompressed (.gz) format.\n"
|
||||||
info: null
|
info: null
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: true
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "integer"
|
- type: "file"
|
||||||
name: "--pseudo_aligner_kmer_size"
|
name: "--d_list"
|
||||||
description: "Kmer length passed to indexing step of pseudoaligners."
|
description: "Path to a FASTA-file containing sequences to mask from quantification.\n"
|
||||||
info: null
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
required: false
|
required: false
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
@@ -24,9 +28,9 @@ argument_groups:
|
|||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--kallisto_index"
|
name: "--index"
|
||||||
info: null
|
info: null
|
||||||
default:
|
example:
|
||||||
- "Kallisto_index"
|
- "Kallisto_index"
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
@@ -34,37 +38,96 @@ argument_groups:
|
|||||||
direction: "output"
|
direction: "output"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
|
- name: "Options"
|
||||||
|
arguments:
|
||||||
|
- type: "integer"
|
||||||
|
name: "--kmer_size"
|
||||||
|
description: "Kmer length passed to indexing step of pseudoaligners (default:\
|
||||||
|
\ '31').\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 31
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--make_unique"
|
||||||
|
description: "Replace repeated target names with unique names.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--aa"
|
||||||
|
description: "Generate index from a FASTA-file containing amino acid sequences.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--distiguish"
|
||||||
|
description: "Generate index where sequences are distinguished by the sequence\
|
||||||
|
\ names.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--min_size"
|
||||||
|
alternatives:
|
||||||
|
- "-m"
|
||||||
|
description: "Length of minimizers (default: automatically chosen).\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--ec_max_size"
|
||||||
|
alternatives:
|
||||||
|
- "-e"
|
||||||
|
description: "Maximum number of targets in an equivalence class (default: no maximum).\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--tmp"
|
||||||
|
alternatives:
|
||||||
|
- "-T"
|
||||||
|
description: "Path to a directory for temporary files.\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "tmp"
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
resources:
|
resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "script.sh"
|
path: "script.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
description: "Create Kallisto index.\n"
|
description: "Build a Kallisto index for the transcriptome to use Kallisto in the\
|
||||||
|
\ mapping-based mode.\n"
|
||||||
test_resources:
|
test_resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "test.sh"
|
path: "test.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
- type: "file"
|
- type: "file"
|
||||||
path: "transcriptome.fasta"
|
path: "test_data"
|
||||||
info:
|
info: null
|
||||||
migration_info:
|
|
||||||
git_repo: "https://github.com/nf-core/rnaseq.git"
|
|
||||||
paths:
|
|
||||||
- "modules/nf-core/kallisto/index/main.nf"
|
|
||||||
- "modules/nf-core/kallisto/index/meta.yml"
|
|
||||||
last_sha: "c0816976384d5e7ee6079c29c45958df1ffa0ee4"
|
|
||||||
status: "enabled"
|
status: "enabled"
|
||||||
requirements:
|
requirements:
|
||||||
commands:
|
commands:
|
||||||
- "ps"
|
- "ps"
|
||||||
repositories:
|
keywords:
|
||||||
- type: "vsh"
|
- "kallisto"
|
||||||
name: "biobox"
|
- "index"
|
||||||
repo: "vsh/biobox"
|
license: "BSD 2-Clause License"
|
||||||
tag: "main"
|
references:
|
||||||
- type: "vsh"
|
doi:
|
||||||
name: "craftbox"
|
- "https://doi.org/10.1038/nbt.3519"
|
||||||
repo: "craftbox"
|
links:
|
||||||
tag: "v0.1.0"
|
repository: "https://github.com/pachterlab/kallisto"
|
||||||
|
homepage: "https://pachterlab.github.io/kallisto/about"
|
||||||
|
documentation: "https://pachterlab.github.io/kallisto/manual"
|
||||||
|
issue_tracker: "https://github.com/pachterlab/kallisto/issues"
|
||||||
runners:
|
runners:
|
||||||
- type: "executable"
|
- type: "executable"
|
||||||
id: "executable"
|
id: "executable"
|
||||||
@@ -155,31 +218,28 @@ build_info:
|
|||||||
output: "target/nextflow/kallisto/kallisto_index"
|
output: "target/nextflow/kallisto/kallisto_index"
|
||||||
executable: "target/nextflow/kallisto/kallisto_index/main.nf"
|
executable: "target/nextflow/kallisto/kallisto_index/main.nf"
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
package_config:
|
package_config:
|
||||||
name: "rnaseq"
|
name: "biobox"
|
||||||
version: "main"
|
version: "main"
|
||||||
info:
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
test_resources:
|
info: null
|
||||||
- path: "gs://viash-hub-test-data/rnaseq/v1"
|
|
||||||
dest: "testData"
|
|
||||||
repositories:
|
|
||||||
- type: "vsh"
|
|
||||||
name: "biobox"
|
|
||||||
repo: "vsh/biobox"
|
|
||||||
tag: "main"
|
|
||||||
- type: "vsh"
|
|
||||||
name: "craftbox"
|
|
||||||
repo: "craftbox"
|
|
||||||
tag: "v0.1.0"
|
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
source: "src"
|
source: "src"
|
||||||
target: "target"
|
target: "target"
|
||||||
config_mods:
|
config_mods:
|
||||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
|
- ".requirements.commands := ['ps']\n"
|
||||||
\ := '$id'\n"
|
|
||||||
- ".engines += { type: \"native\" }"
|
- ".engines += { type: \"native\" }"
|
||||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
organization: "vsh"
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
@@ -2813,18 +2813,21 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--transcriptome_fasta",
|
"name" : "--input",
|
||||||
|
"description" : "Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n",
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : true,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "integer",
|
"type" : "file",
|
||||||
"name" : "--pseudo_aligner_kmer_size",
|
"name" : "--d_list",
|
||||||
"description" : "Kmer length passed to indexing step of pseudoaligners.",
|
"description" : "Path to a FASTA-file containing sequences to mask from quantification.\n",
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
@@ -2837,8 +2840,8 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--kallisto_index",
|
"name" : "--index",
|
||||||
"default" : [
|
"example" : [
|
||||||
"Kallisto_index"
|
"Kallisto_index"
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
@@ -2849,6 +2852,80 @@ meta = [
|
|||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name" : "Options",
|
||||||
|
"arguments" : [
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--kmer_size",
|
||||||
|
"description" : "Kmer length passed to indexing step of pseudoaligners (default: '31').\n",
|
||||||
|
"example" : [
|
||||||
|
31
|
||||||
|
],
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--make_unique",
|
||||||
|
"description" : "Replace repeated target names with unique names.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--aa",
|
||||||
|
"description" : "Generate index from a FASTA-file containing amino acid sequences.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--distiguish",
|
||||||
|
"description" : "Generate index where sequences are distinguished by the sequence names.\n",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--min_size",
|
||||||
|
"alternatives" : [
|
||||||
|
"-m"
|
||||||
|
],
|
||||||
|
"description" : "Length of minimizers (default: automatically chosen).\n",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--ec_max_size",
|
||||||
|
"alternatives" : [
|
||||||
|
"-e"
|
||||||
|
],
|
||||||
|
"description" : "Maximum number of targets in an equivalence class (default: no maximum).\n",
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "string",
|
||||||
|
"name" : "--tmp",
|
||||||
|
"alternatives" : [
|
||||||
|
"-T"
|
||||||
|
],
|
||||||
|
"description" : "Path to a directory for temporary files.\n",
|
||||||
|
"example" : [
|
||||||
|
"tmp"
|
||||||
|
],
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"resources" : [
|
"resources" : [
|
||||||
@@ -2858,7 +2935,7 @@ meta = [
|
|||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description" : "Create Kallisto index.\n",
|
"description" : "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n",
|
||||||
"test_resources" : [
|
"test_resources" : [
|
||||||
{
|
{
|
||||||
"type" : "bash_script",
|
"type" : "bash_script",
|
||||||
@@ -2867,39 +2944,31 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"path" : "/testData/minimal_test/reference/transcriptome.fasta"
|
"path" : "test_data"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"info" : {
|
|
||||||
"migration_info" : {
|
|
||||||
"git_repo" : "https://github.com/nf-core/rnaseq.git",
|
|
||||||
"paths" : [
|
|
||||||
"modules/nf-core/kallisto/index/main.nf",
|
|
||||||
"modules/nf-core/kallisto/index/meta.yml"
|
|
||||||
],
|
|
||||||
"last_sha" : "c0816976384d5e7ee6079c29c45958df1ffa0ee4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"status" : "enabled",
|
"status" : "enabled",
|
||||||
"requirements" : {
|
"requirements" : {
|
||||||
"commands" : [
|
"commands" : [
|
||||||
"ps"
|
"ps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"repositories" : [
|
"keywords" : [
|
||||||
{
|
"kallisto",
|
||||||
"type" : "vsh",
|
"index"
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
|
"license" : "BSD 2-Clause License",
|
||||||
|
"references" : {
|
||||||
|
"doi" : [
|
||||||
|
"https://doi.org/10.1038/nbt.3519"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/pachterlab/kallisto",
|
||||||
|
"homepage" : "https://pachterlab.github.io/kallisto/about",
|
||||||
|
"documentation" : "https://pachterlab.github.io/kallisto/manual",
|
||||||
|
"issue_tracker" : "https://github.com/pachterlab/kallisto/issues"
|
||||||
|
},
|
||||||
"runners" : [
|
"runners" : [
|
||||||
{
|
{
|
||||||
"type" : "executable",
|
"type" : "executable",
|
||||||
@@ -3000,46 +3069,36 @@ meta = [
|
|||||||
"config" : "/workdir/root/repo/src/kallisto/kallisto_index/config.vsh.yaml",
|
"config" : "/workdir/root/repo/src/kallisto/kallisto_index/config.vsh.yaml",
|
||||||
"runner" : "nextflow",
|
"runner" : "nextflow",
|
||||||
"engine" : "docker|native",
|
"engine" : "docker|native",
|
||||||
"output" : "/workdir/root/repo/target/nextflow/kallisto/kallisto_index",
|
"output" : "target/nextflow/kallisto/kallisto_index",
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
|
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
|
||||||
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
|
||||||
|
"git_tag" : "v0.2.0-26-ga13b57d"
|
||||||
},
|
},
|
||||||
"package_config" : {
|
"package_config" : {
|
||||||
"name" : "rnaseq",
|
"name" : "biobox",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"info" : {
|
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
|
||||||
"test_resources" : [
|
|
||||||
{
|
|
||||||
"path" : "gs://viash-hub-test-data/rnaseq/v1",
|
|
||||||
"dest" : "testData"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"repositories" : [
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"source" : "/workdir/root/repo/src",
|
"source" : "src",
|
||||||
"target" : "/workdir/root/repo/target",
|
"target" : "target",
|
||||||
"config_mods" : [
|
"config_mods" : [
|
||||||
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
|
".requirements.commands := ['ps']\n",
|
||||||
".engines += { type: \\"native\\" }",
|
".engines += { type: \\"native\\" }",
|
||||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||||
".engines[.type == 'docker'].target_tag := 'main'"
|
".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
],
|
],
|
||||||
"organization" : "vsh"
|
"keywords" : [
|
||||||
|
"bioinformatics",
|
||||||
|
"modules",
|
||||||
|
"sequencing"
|
||||||
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"organization" : "vsh",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/viash-hub/biobox",
|
||||||
|
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'''))
|
}'''))
|
||||||
]
|
]
|
||||||
@@ -3053,11 +3112,20 @@ def innerWorkflowFactory(args) {
|
|||||||
def rawScript = '''set -e
|
def rawScript = '''set -e
|
||||||
tempscript=".viash_script.sh"
|
tempscript=".viash_script.sh"
|
||||||
cat > "$tempscript" << VIASHMAIN
|
cat > "$tempscript" << VIASHMAIN
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
## VIASH START
|
## VIASH START
|
||||||
# The following code has been auto-generated by Viash.
|
# The following code has been auto-generated by Viash.
|
||||||
$( if [ ! -z ${VIASH_PAR_TRANSCRIPTOME_FASTA+x} ]; then echo "${VIASH_PAR_TRANSCRIPTOME_FASTA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_transcriptome_fasta='&'#" ; else echo "# par_transcriptome_fasta="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE+x} ]; then echo "${VIASH_PAR_PSEUDO_ALIGNER_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pseudo_aligner_kmer_size='&'#" ; else echo "# par_pseudo_aligner_kmer_size="; fi )
|
$( if [ ! -z ${VIASH_PAR_D_LIST+x} ]; then echo "${VIASH_PAR_D_LIST}" | sed "s#'#'\\"'\\"'#g;s#.*#par_d_list='&'#" ; else echo "# par_d_list="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_KALLISTO_INDEX+x} ]; then echo "${VIASH_PAR_KALLISTO_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kallisto_index='&'#" ; else echo "# par_kallisto_index="; fi )
|
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_KMER_SIZE+x} ]; then echo "${VIASH_PAR_KMER_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmer_size='&'#" ; else echo "# par_kmer_size="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_MAKE_UNIQUE+x} ]; then echo "${VIASH_PAR_MAKE_UNIQUE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_make_unique='&'#" ; else echo "# par_make_unique="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_AA+x} ]; then echo "${VIASH_PAR_AA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_aa='&'#" ; else echo "# par_aa="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_DISTIGUISH+x} ]; then echo "${VIASH_PAR_DISTIGUISH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_distiguish='&'#" ; else echo "# par_distiguish="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_MIN_SIZE+x} ]; then echo "${VIASH_PAR_MIN_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_size='&'#" ; else echo "# par_min_size="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_EC_MAX_SIZE+x} ]; then echo "${VIASH_PAR_EC_MAX_SIZE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_ec_max_size='&'#" ; else echo "# par_ec_max_size="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_TMP+x} ]; then echo "${VIASH_PAR_TMP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tmp='&'#" ; else echo "# par_tmp="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
||||||
@@ -3078,14 +3146,35 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
|
|||||||
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
||||||
|
|
||||||
## VIASH END
|
## VIASH END
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
set -eo pipefail
|
||||||
|
|
||||||
|
unset_if_false=( par_make_unique par_aa par_distinguish )
|
||||||
|
|
||||||
|
for var in "\\${unset_if_false[@]}"; do
|
||||||
|
temp_var="\\${!var}"
|
||||||
|
[[ "\\$temp_var" == "false" ]] && unset \\$var
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -n "\\$par_kmer_size" ]; then
|
||||||
|
if [[ "\\$par_kmer_size" -lt 1 || "\\$par_kmer_size" -gt 31 || \\$(( par_kmer_size % 2 )) -eq 0 ]]; then
|
||||||
|
echo "Error: Kmer size must be an odd number between 1 and 31."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
kallisto index \\\\
|
kallisto index \\\\
|
||||||
\\${par_pseudo_aligner_kmer_size:+-k \\$par_pseudo_aligner_kmer_size} \\\\
|
-i "\\${par_index}" \\\\
|
||||||
-i \\$par_kallisto_index \\\\
|
\\${par_kmer_size:+--kmer-size "\\${par_kmer_size}"} \\\\
|
||||||
\\$par_transcriptome_fasta
|
\\${par_make_unique:+--make-unique} \\\\
|
||||||
|
\\${par_aa:+--aa} \\\\
|
||||||
|
\\${par_distinguish:+--distinguish} \\\\
|
||||||
|
\\${par_min_size:+--min-size "\\${par_min_size}"} \\\\
|
||||||
|
\\${par_ec_max_size:+--ec-max-size "\\${par_ec_max_size}"} \\\\
|
||||||
|
\\${par_d_list:+--d-list "\\${par_d_list}"} \\\\
|
||||||
|
\\${meta_cpus:+--threads "\\${meta_cpus}"} \\\\
|
||||||
|
\\${par_tmp:+--tmp "\\${par_tmp}"} \\\\
|
||||||
|
"\\${par_input}"
|
||||||
VIASHMAIN
|
VIASHMAIN
|
||||||
bash "$tempscript"
|
bash "$tempscript"
|
||||||
'''
|
'''
|
||||||
@@ -3446,7 +3535,7 @@ meta["defaults"] = [
|
|||||||
directives: readJsonBlob('''{
|
directives: readJsonBlob('''{
|
||||||
"container" : {
|
"container" : {
|
||||||
"registry" : "images.viash-hub.com",
|
"registry" : "images.viash-hub.com",
|
||||||
"image" : "vsh/rnaseq/kallisto/kallisto_index",
|
"image" : "vsh/biobox/kallisto/kallisto_index",
|
||||||
"tag" : "main"
|
"tag" : "main"
|
||||||
},
|
},
|
||||||
"tag" : "$id"
|
"tag" : "$id"
|
||||||
@@ -3,7 +3,7 @@ manifest {
|
|||||||
mainScript = 'main.nf'
|
mainScript = 'main.nf'
|
||||||
nextflowVersion = '!>=20.12.1-edge'
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
version = 'main'
|
version = 'main'
|
||||||
description = 'Create Kallisto index.\n'
|
description = 'Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
process.container = 'nextflow/bash:latest'
|
process.container = 'nextflow/bash:latest'
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema",
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
"title": "bbmap_bbsplit",
|
"title": "kallisto_index",
|
||||||
"description": "Split sequencing reads by mapping them to multiple references simultaneously.\n",
|
"description": "Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.\n",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"definitions": {
|
"definitions": {
|
||||||
|
|
||||||
@@ -14,72 +14,21 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"id": {
|
|
||||||
"type":
|
|
||||||
"string",
|
|
||||||
"description": "Type: `string`. Sample ID",
|
|
||||||
"help_text": "Type: `string`. Sample ID"
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"paired": {
|
|
||||||
"type":
|
|
||||||
"boolean",
|
|
||||||
"description": "Type: `boolean`, default: `false`. Paired fastq files or not?",
|
|
||||||
"help_text": "Type: `boolean`, default: `false`. Paired fastq files or not?"
|
|
||||||
,
|
|
||||||
"default":false
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"input": {
|
"input": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: List of `file`, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)",
|
"description": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (",
|
||||||
"help_text": "Type: List of `file`, example: `sample.fastq`, multiple_sep: `\",\"`. Input fastq files, either one or two (paired)"
|
"help_text": "Type: `file`, required. Path to a FASTA-file containing the transcriptome sequences, either in plain text or \ncompressed (.gz) format.\n"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"primary_ref": {
|
"d_list": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`. Primary reference FASTA",
|
"description": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification",
|
||||||
"help_text": "Type: `file`. Primary reference FASTA"
|
"help_text": "Type: `file`. Path to a FASTA-file containing sequences to mask from quantification.\n"
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"bbsplit_fasta_list": {
|
|
||||||
"type":
|
|
||||||
"string",
|
|
||||||
"description": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit",
|
|
||||||
"help_text": "Type: `file`. Path to comma-separated file containing a list of reference genomes to filter reads against with BBSplit."
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"only_build_index": {
|
|
||||||
"type":
|
|
||||||
"boolean",
|
|
||||||
"description": "Type: `boolean`. true = only build index; false = mapping",
|
|
||||||
"help_text": "Type: `boolean`. true = only build index; false = mapping"
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
,
|
|
||||||
"built_bbsplit_index": {
|
|
||||||
"type":
|
|
||||||
"string",
|
|
||||||
"description": "Type: `file`. Directory with index files",
|
|
||||||
"help_text": "Type: `file`. Directory with index files"
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,35 +44,96 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"fastq_1": {
|
"index": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Output file for read 1",
|
"description": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. ",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.fastq_1.fastq`. Output file for read 1."
|
"help_text": "Type: `file`, default: `$id.$key.index.index`, example: `Kallisto_index`. "
|
||||||
,
|
,
|
||||||
"default":"$id.$key.fastq_1.fastq"
|
"default": "$id.$key.index.index"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"options" : {
|
||||||
|
"title": "Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"kmer_size": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027)",
|
||||||
|
"help_text": "Type: `integer`, example: `31`. Kmer length passed to indexing step of pseudoaligners (default: \u002731\u0027).\n"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"fastq_2": {
|
"make_unique": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"boolean",
|
||||||
"description": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2",
|
"description": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.fastq_2.fastq`. Output file for read 2."
|
"help_text": "Type: `boolean_true`, default: `false`. Replace repeated target names with unique names.\n"
|
||||||
,
|
,
|
||||||
"default":"$id.$key.fastq_2.fastq"
|
"default": "False"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"bbsplit_index": {
|
"aa": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Generate index from a FASTA-file containing amino acid sequences.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"distiguish": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Generate index where sequences are distinguished by the sequence names.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"min_size": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. Length of minimizers (default: automatically chosen)",
|
||||||
|
"help_text": "Type: `integer`. Length of minimizers (default: automatically chosen).\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ec_max_size": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum)",
|
||||||
|
"help_text": "Type: `integer`. Maximum number of targets in an equivalence class (default: no maximum).\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"tmp": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, default: `$id.$key.bbsplit_index.bbsplit_index`. Directory with index files",
|
"description": "Type: `string`, example: `tmp`. Path to a directory for temporary files",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.bbsplit_index.bbsplit_index`. Directory with index files"
|
"help_text": "Type: `string`, example: `tmp`. Path to a directory for temporary files.\n"
|
||||||
,
|
|
||||||
"default":"$id.$key.bbsplit_index.bbsplit_index"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -171,6 +181,10 @@
|
|||||||
"$ref": "#/definitions/output"
|
"$ref": "#/definitions/output"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/options"
|
||||||
|
},
|
||||||
|
|
||||||
{
|
{
|
||||||
"$ref": "#/definitions/nextflow input-output arguments"
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
}
|
}
|
||||||
@@ -11,84 +11,33 @@ argument_groups:
|
|||||||
info: null
|
info: null
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: true
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: true
|
multiple: true
|
||||||
multiple_sep: ","
|
|
||||||
- type: "boolean"
|
|
||||||
name: "--paired"
|
|
||||||
description: "Paired reads or not."
|
|
||||||
info: null
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- type: "string"
|
|
||||||
name: "--strandedness"
|
|
||||||
description: "Sample strand-specificity."
|
|
||||||
info: null
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--index"
|
name: "--index"
|
||||||
|
alternatives:
|
||||||
|
- "-i"
|
||||||
description: "Kallisto genome index."
|
description: "Kallisto genome index."
|
||||||
info: null
|
info: null
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: true
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- type: "file"
|
|
||||||
name: "--gtf"
|
|
||||||
description: "Optional gtf file for translation of transcripts into genomic coordinates."
|
|
||||||
info: null
|
|
||||||
must_exist: true
|
|
||||||
create_parent: true
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- type: "file"
|
|
||||||
name: "--chromosomes"
|
|
||||||
description: "Optional tab separated file with chromosome names and lengths."
|
|
||||||
info: null
|
|
||||||
must_exist: true
|
|
||||||
create_parent: true
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- type: "integer"
|
|
||||||
name: "--fragment_length"
|
|
||||||
description: "For single-end mode only, the estimated average fragment length."
|
|
||||||
info: null
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- type: "integer"
|
|
||||||
name: "--fragment_length_sd"
|
|
||||||
description: "For single-end mode only, the estimated standard deviation of the\
|
|
||||||
\ fragment length."
|
|
||||||
info: null
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--output"
|
name: "--output_dir"
|
||||||
description: "Kallisto quant results"
|
alternatives:
|
||||||
|
- "-o"
|
||||||
|
description: "Directory to write output to."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.kallisto_quant_results"
|
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: true
|
||||||
direction: "output"
|
direction: "output"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
@@ -96,73 +45,114 @@ argument_groups:
|
|||||||
name: "--log"
|
name: "--log"
|
||||||
description: "File containing log information from running kallisto quant"
|
description: "File containing log information from running kallisto quant"
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.kallisto_quant.log.txt"
|
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: false
|
||||||
direction: "output"
|
direction: "output"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- name: "Options"
|
||||||
name: "--run_info"
|
arguments:
|
||||||
description: "A json file containing information about the run"
|
- type: "boolean_true"
|
||||||
|
name: "--single"
|
||||||
|
description: "Single end mode."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--single_overhang"
|
||||||
|
description: "Include reads where unobserved rest of fragment is predicted to\
|
||||||
|
\ lie outside a transcript."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--fr_stranded"
|
||||||
|
description: "Strand specific reads, first read forward."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--rf_stranded"
|
||||||
|
description: "Strand specific reads, first read reverse."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "double"
|
||||||
|
name: "--fragment_length"
|
||||||
|
alternatives:
|
||||||
|
- "-l"
|
||||||
|
description: "The estimated average fragment length."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.run_info.json"
|
|
||||||
must_exist: true
|
|
||||||
create_parent: true
|
|
||||||
required: false
|
required: false
|
||||||
direction: "output"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- type: "double"
|
||||||
name: "--quant_results_file"
|
name: "--sd"
|
||||||
description: "TSV file containing abundance estimates from Kallisto"
|
alternatives:
|
||||||
|
- "-s"
|
||||||
|
description: "The estimated standard deviation of the fragment length (default:\
|
||||||
|
\ -l, -s values are estimated \nfrom paired end data, but are required when\
|
||||||
|
\ using --single).\n"
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.abundance.tsv"
|
|
||||||
must_exist: true
|
|
||||||
create_parent: true
|
|
||||||
required: false
|
required: false
|
||||||
direction: "output"
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--plaintext"
|
||||||
|
description: "Output plaintext instead of HDF5."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--bootstrap_samples"
|
||||||
|
alternatives:
|
||||||
|
- "-b"
|
||||||
|
description: "Number of bootstrap samples to draw. Default: '0'\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--seed"
|
||||||
|
description: "Random seed for bootstrap. Default: '42'\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 42
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
resources:
|
resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "script.sh"
|
path: "script.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
description: "Computes equivalence classes for reads and quantifies abundances.\n"
|
description: "Quantifying abundances of transcripts from RNA-Seq data, or more generally\
|
||||||
|
\ of target sequences using high-throughput sequencing reads.\n"
|
||||||
test_resources:
|
test_resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "test.sh"
|
path: "test.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
- type: "file"
|
- type: "file"
|
||||||
path: "transcriptome.fasta"
|
path: "test_data"
|
||||||
- type: "file"
|
info: null
|
||||||
path: "SRR6357070_1.fastq.gz"
|
|
||||||
- type: "file"
|
|
||||||
path: "SRR6357070_2.fastq.gz"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: "https://github.com/nf-core/rnaseq.git"
|
|
||||||
paths:
|
|
||||||
- "modules/nf-core/kallisto/quant/main.nf"
|
|
||||||
- "modules/nf-core/kallisto/quant/meta.yml"
|
|
||||||
last_sha: "aff1d2e02717247831644769fc3ba84868c3fdde"
|
|
||||||
status: "enabled"
|
status: "enabled"
|
||||||
requirements:
|
requirements:
|
||||||
commands:
|
commands:
|
||||||
- "ps"
|
- "ps"
|
||||||
repositories:
|
keywords:
|
||||||
- type: "vsh"
|
- "kallisto"
|
||||||
name: "biobox"
|
- "quant"
|
||||||
repo: "vsh/biobox"
|
- "pseudoalignment"
|
||||||
tag: "main"
|
license: "BSD 2-Clause License"
|
||||||
- type: "vsh"
|
references:
|
||||||
name: "craftbox"
|
doi:
|
||||||
repo: "craftbox"
|
- "10.1038/nbt.3519"
|
||||||
tag: "v0.1.0"
|
links:
|
||||||
|
repository: "https://github.com/pachterlab/kallisto"
|
||||||
|
homepage: "https://pachterlab.github.io/kallisto/about"
|
||||||
|
documentation: "https://pachterlab.github.io/kallisto/manual"
|
||||||
|
issue_tracker: "https://github.com/pachterlab/kallisto/issues"
|
||||||
runners:
|
runners:
|
||||||
- type: "executable"
|
- type: "executable"
|
||||||
id: "executable"
|
id: "executable"
|
||||||
@@ -242,6 +232,9 @@ engines:
|
|||||||
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
|
\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz\
|
||||||
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
|
\ && \\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\nmv kallisto/kallisto\
|
||||||
\ /usr/local/bin/\n"
|
\ /usr/local/bin/\n"
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "echo \"kallisto: $(kallisto version | sed 's/kallisto, version //')\" > /var/software_versions.txt\n"
|
||||||
entrypoint: []
|
entrypoint: []
|
||||||
cmd: null
|
cmd: null
|
||||||
- type: "native"
|
- type: "native"
|
||||||
@@ -253,31 +246,28 @@ build_info:
|
|||||||
output: "target/nextflow/kallisto/kallisto_quant"
|
output: "target/nextflow/kallisto/kallisto_quant"
|
||||||
executable: "target/nextflow/kallisto/kallisto_quant/main.nf"
|
executable: "target/nextflow/kallisto/kallisto_quant/main.nf"
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
package_config:
|
package_config:
|
||||||
name: "rnaseq"
|
name: "biobox"
|
||||||
version: "main"
|
version: "main"
|
||||||
info:
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
test_resources:
|
info: null
|
||||||
- path: "gs://viash-hub-test-data/rnaseq/v1"
|
|
||||||
dest: "testData"
|
|
||||||
repositories:
|
|
||||||
- type: "vsh"
|
|
||||||
name: "biobox"
|
|
||||||
repo: "vsh/biobox"
|
|
||||||
tag: "main"
|
|
||||||
- type: "vsh"
|
|
||||||
name: "craftbox"
|
|
||||||
repo: "craftbox"
|
|
||||||
tag: "v0.1.0"
|
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
source: "src"
|
source: "src"
|
||||||
target: "target"
|
target: "target"
|
||||||
config_mods:
|
config_mods:
|
||||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
|
- ".requirements.commands := ['ps']\n"
|
||||||
\ := '$id'\n"
|
|
||||||
- ".engines += { type: \"native\" }"
|
- ".engines += { type: \"native\" }"
|
||||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
organization: "vsh"
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
@@ -2817,76 +2817,21 @@ meta = [
|
|||||||
"description" : "List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.",
|
"description" : "List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively.",
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : true,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : true,
|
"multiple" : true,
|
||||||
"multiple_sep" : ","
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "boolean",
|
|
||||||
"name" : "--paired",
|
|
||||||
"description" : "Paired reads or not.",
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "string",
|
|
||||||
"name" : "--strandedness",
|
|
||||||
"description" : "Sample strand-specificity.",
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--index",
|
"name" : "--index",
|
||||||
|
"alternatives" : [
|
||||||
|
"-i"
|
||||||
|
],
|
||||||
"description" : "Kallisto genome index.",
|
"description" : "Kallisto genome index.",
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : true,
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"name" : "--gtf",
|
|
||||||
"description" : "Optional gtf file for translation of transcripts into genomic coordinates.",
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"name" : "--chromosomes",
|
|
||||||
"description" : "Optional tab separated file with chromosome names and lengths.",
|
|
||||||
"must_exist" : true,
|
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "integer",
|
|
||||||
"name" : "--fragment_length",
|
|
||||||
"description" : "For single-end mode only, the estimated average fragment length.",
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "integer",
|
|
||||||
"name" : "--fragment_length_sd",
|
|
||||||
"description" : "For single-end mode only, the estimated standard deviation of the fragment length.",
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
@@ -2898,14 +2843,14 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--output",
|
"name" : "--output_dir",
|
||||||
"description" : "Kallisto quant results",
|
"alternatives" : [
|
||||||
"default" : [
|
"-o"
|
||||||
"$id.kallisto_quant_results"
|
|
||||||
],
|
],
|
||||||
|
"description" : "Directory to write output to.",
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : true,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
@@ -2914,41 +2859,96 @@ meta = [
|
|||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--log",
|
"name" : "--log",
|
||||||
"description" : "File containing log information from running kallisto quant",
|
"description" : "File containing log information from running kallisto quant",
|
||||||
"default" : [
|
|
||||||
"$id.kallisto_quant.log.txt"
|
|
||||||
],
|
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name" : "Options",
|
||||||
|
"arguments" : [
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--single",
|
||||||
|
"description" : "Single end mode.",
|
||||||
|
"direction" : "input"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "boolean_true",
|
||||||
"name" : "--run_info",
|
"name" : "--single_overhang",
|
||||||
"description" : "A json file containing information about the run",
|
"description" : "Include reads where unobserved rest of fragment is predicted to lie outside a transcript.",
|
||||||
"default" : [
|
"direction" : "input"
|
||||||
"$id.run_info.json"
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--fr_stranded",
|
||||||
|
"description" : "Strand specific reads, first read forward.",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--rf_stranded",
|
||||||
|
"description" : "Strand specific reads, first read reverse.",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "double",
|
||||||
|
"name" : "--fragment_length",
|
||||||
|
"alternatives" : [
|
||||||
|
"-l"
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"description" : "The estimated average fragment length.",
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "double",
|
||||||
"name" : "--quant_results_file",
|
"name" : "--sd",
|
||||||
"description" : "TSV file containing abundance estimates from Kallisto",
|
"alternatives" : [
|
||||||
"default" : [
|
"-s"
|
||||||
"$id.abundance.tsv"
|
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"description" : "The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n",
|
||||||
"create_parent" : true,
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "boolean_true",
|
||||||
|
"name" : "--plaintext",
|
||||||
|
"description" : "Output plaintext instead of HDF5.",
|
||||||
|
"direction" : "input"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--bootstrap_samples",
|
||||||
|
"alternatives" : [
|
||||||
|
"-b"
|
||||||
|
],
|
||||||
|
"description" : "Number of bootstrap samples to draw. Default: '0'\n",
|
||||||
|
"example" : [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "integer",
|
||||||
|
"name" : "--seed",
|
||||||
|
"description" : "Random seed for bootstrap. Default: '42'\n",
|
||||||
|
"example" : [
|
||||||
|
42
|
||||||
|
],
|
||||||
|
"required" : false,
|
||||||
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
}
|
}
|
||||||
@@ -2962,7 +2962,7 @@ meta = [
|
|||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description" : "Computes equivalence classes for reads and quantifies abundances.\n",
|
"description" : "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n",
|
||||||
"test_resources" : [
|
"test_resources" : [
|
||||||
{
|
{
|
||||||
"type" : "bash_script",
|
"type" : "bash_script",
|
||||||
@@ -2971,47 +2971,32 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"path" : "/testData/minimal_test/reference/transcriptome.fasta"
|
"path" : "test_data"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/minimal_test/input_fastq/SRR6357070_1.fastq.gz"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/minimal_test/input_fastq/SRR6357070_2.fastq.gz"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"info" : {
|
|
||||||
"migration_info" : {
|
|
||||||
"git_repo" : "https://github.com/nf-core/rnaseq.git",
|
|
||||||
"paths" : [
|
|
||||||
"modules/nf-core/kallisto/quant/main.nf",
|
|
||||||
"modules/nf-core/kallisto/quant/meta.yml"
|
|
||||||
],
|
|
||||||
"last_sha" : "aff1d2e02717247831644769fc3ba84868c3fdde"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"status" : "enabled",
|
"status" : "enabled",
|
||||||
"requirements" : {
|
"requirements" : {
|
||||||
"commands" : [
|
"commands" : [
|
||||||
"ps"
|
"ps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"repositories" : [
|
"keywords" : [
|
||||||
{
|
"kallisto",
|
||||||
"type" : "vsh",
|
"quant",
|
||||||
"name" : "biobox",
|
"pseudoalignment"
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
|
"license" : "BSD 2-Clause License",
|
||||||
|
"references" : {
|
||||||
|
"doi" : [
|
||||||
|
"10.1038/nbt.3519"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/pachterlab/kallisto",
|
||||||
|
"homepage" : "https://pachterlab.github.io/kallisto/about",
|
||||||
|
"documentation" : "https://pachterlab.github.io/kallisto/manual",
|
||||||
|
"issue_tracker" : "https://github.com/pachterlab/kallisto/issues"
|
||||||
|
},
|
||||||
"runners" : [
|
"runners" : [
|
||||||
{
|
{
|
||||||
"type" : "executable",
|
"type" : "executable",
|
||||||
@@ -3100,6 +3085,12 @@ meta = [
|
|||||||
"run" : [
|
"run" : [
|
||||||
"apt-get update && \\\\\napt-get install -y --no-install-recommends wget && \\\\\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \\\\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\\\nmv kallisto/kallisto /usr/local/bin/\n"
|
"apt-get update && \\\\\napt-get install -y --no-install-recommends wget && \\\\\nwget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \\\\\ntar -xzf kallisto_linux-v0.50.1.tar.gz && \\\\\nmv kallisto/kallisto /usr/local/bin/\n"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "docker",
|
||||||
|
"run" : [
|
||||||
|
"echo \\"kallisto: $(kallisto version | sed 's/kallisto, version //')\\" > /var/software_versions.txt\n"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -3112,46 +3103,36 @@ meta = [
|
|||||||
"config" : "/workdir/root/repo/src/kallisto/kallisto_quant/config.vsh.yaml",
|
"config" : "/workdir/root/repo/src/kallisto/kallisto_quant/config.vsh.yaml",
|
||||||
"runner" : "nextflow",
|
"runner" : "nextflow",
|
||||||
"engine" : "docker|native",
|
"engine" : "docker|native",
|
||||||
"output" : "/workdir/root/repo/target/nextflow/kallisto/kallisto_quant",
|
"output" : "target/nextflow/kallisto/kallisto_quant",
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
|
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
|
||||||
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
|
||||||
|
"git_tag" : "v0.2.0-26-ga13b57d"
|
||||||
},
|
},
|
||||||
"package_config" : {
|
"package_config" : {
|
||||||
"name" : "rnaseq",
|
"name" : "biobox",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"info" : {
|
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
|
||||||
"test_resources" : [
|
|
||||||
{
|
|
||||||
"path" : "gs://viash-hub-test-data/rnaseq/v1",
|
|
||||||
"dest" : "testData"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"repositories" : [
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"source" : "/workdir/root/repo/src",
|
"source" : "src",
|
||||||
"target" : "/workdir/root/repo/target",
|
"target" : "target",
|
||||||
"config_mods" : [
|
"config_mods" : [
|
||||||
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
|
".requirements.commands := ['ps']\n",
|
||||||
".engines += { type: \\"native\\" }",
|
".engines += { type: \\"native\\" }",
|
||||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||||
".engines[.type == 'docker'].target_tag := 'main'"
|
".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
],
|
],
|
||||||
"organization" : "vsh"
|
"keywords" : [
|
||||||
|
"bioinformatics",
|
||||||
|
"modules",
|
||||||
|
"sequencing"
|
||||||
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"organization" : "vsh",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/viash-hub/biobox",
|
||||||
|
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'''))
|
}'''))
|
||||||
]
|
]
|
||||||
@@ -3165,20 +3146,23 @@ def innerWorkflowFactory(args) {
|
|||||||
def rawScript = '''set -e
|
def rawScript = '''set -e
|
||||||
tempscript=".viash_script.sh"
|
tempscript=".viash_script.sh"
|
||||||
cat > "$tempscript" << VIASHMAIN
|
cat > "$tempscript" << VIASHMAIN
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
## VIASH START
|
## VIASH START
|
||||||
# The following code has been auto-generated by Viash.
|
# The following code has been auto-generated by Viash.
|
||||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_STRANDEDNESS+x} ]; then echo "${VIASH_PAR_STRANDEDNESS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_strandedness='&'#" ; else echo "# par_strandedness="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
|
$( if [ ! -z ${VIASH_PAR_INDEX+x} ]; then echo "${VIASH_PAR_INDEX}" | sed "s#'#'\\"'\\"'#g;s#.*#par_index='&'#" ; else echo "# par_index="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi )
|
$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_CHROMOSOMES+x} ]; then echo "${VIASH_PAR_CHROMOSOMES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_chromosomes='&'#" ; else echo "# par_chromosomes="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH_SD+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length_sd='&'#" ; else echo "# par_fragment_length_sd="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
|
|
||||||
$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi )
|
$( if [ ! -z ${VIASH_PAR_LOG+x} ]; then echo "${VIASH_PAR_LOG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_log='&'#" ; else echo "# par_log="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_RUN_INFO+x} ]; then echo "${VIASH_PAR_RUN_INFO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_run_info='&'#" ; else echo "# par_run_info="; fi )
|
$( if [ ! -z ${VIASH_PAR_SINGLE+x} ]; then echo "${VIASH_PAR_SINGLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single='&'#" ; else echo "# par_single="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_QUANT_RESULTS_FILE+x} ]; then echo "${VIASH_PAR_QUANT_RESULTS_FILE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quant_results_file='&'#" ; else echo "# par_quant_results_file="; fi )
|
$( if [ ! -z ${VIASH_PAR_SINGLE_OVERHANG+x} ]; then echo "${VIASH_PAR_SINGLE_OVERHANG}" | sed "s#'#'\\"'\\"'#g;s#.*#par_single_overhang='&'#" ; else echo "# par_single_overhang="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_FR_STRANDED+x} ]; then echo "${VIASH_PAR_FR_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fr_stranded='&'#" ; else echo "# par_fr_stranded="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_RF_STRANDED+x} ]; then echo "${VIASH_PAR_RF_STRANDED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_rf_stranded='&'#" ; else echo "# par_rf_stranded="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_FRAGMENT_LENGTH+x} ]; then echo "${VIASH_PAR_FRAGMENT_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_fragment_length='&'#" ; else echo "# par_fragment_length="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_SD+x} ]; then echo "${VIASH_PAR_SD}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sd='&'#" ; else echo "# par_sd="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_PLAINTEXT+x} ]; then echo "${VIASH_PAR_PLAINTEXT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_plaintext='&'#" ; else echo "# par_plaintext="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_BOOTSTRAP_SAMPLES+x} ]; then echo "${VIASH_PAR_BOOTSTRAP_SAMPLES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bootstrap_samples='&'#" ; else echo "# par_bootstrap_samples="; fi )
|
||||||
|
$( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "${VIASH_PAR_SEED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_seed='&'#" ; else echo "# par_seed="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||||
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
|
||||||
@@ -3199,46 +3183,46 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
|
|||||||
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
|
||||||
|
|
||||||
## VIASH END
|
## VIASH END
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eo pipefail
|
set -eo pipefail
|
||||||
|
|
||||||
IFS="," read -ra input <<< \\$par_input
|
unset_if_false=( par_single par_single_overhang par_rf_stranded par_fr_stranded par_plaintext )
|
||||||
|
|
||||||
single_end_params=''
|
for var in "\\${unset_if_false[@]}"; do
|
||||||
if [ \\$par_paired == "false" ]; then
|
temp_var="\\${!var}"
|
||||||
if [[ \\$par_fragment_length < 0 ]] || [[ ! \\$fragment_length_sd < 0 ]]; then
|
[[ "\\$temp_var" == "false" ]] && unset \\$var
|
||||||
echo "fragment_length and fragment_length_sd must be set for single-end data"
|
done
|
||||||
|
|
||||||
|
IFS=";" read -ra input <<< \\$par_input
|
||||||
|
|
||||||
|
# Check if par_single is not set and ensure even number of input files
|
||||||
|
if [ -z "\\$par_single" ]; then
|
||||||
|
if [ \\$((\\${#input[@]} % 2)) -ne 0 ]; then
|
||||||
|
echo "Error: When running in paired-end mode, the number of input files must be even."
|
||||||
|
echo "Number of input files provided: \\${#input[@]}"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
single_end_params="--single --fragment-length \\$par_fragment_length --sd \\$par_fragment_length_sd"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
strandedness=''
|
|
||||||
if [[ "\\$par_extra_args" != *"--fr-stranded"* ]] && [[ "\\$par_extra_args" != *"--rf-stranded"* ]]; then
|
|
||||||
if [ "\\$par_strandedness" == 'forward' ]; then
|
|
||||||
strandedness='--fr-stranded'
|
|
||||||
elif [ "\\$par_strandedness" == 'reverse' ]; then
|
|
||||||
strandedness='--rf-stranded'
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p \\$par_output
|
mkdir -p \\$par_output_dir
|
||||||
|
|
||||||
|
|
||||||
kallisto quant \\\\
|
kallisto quant \\\\
|
||||||
\\${meta_cpus:+--threads \\$meta_cpus} \\\\
|
\\${meta_cpus:+--threads \\$meta_cpus} \\\\
|
||||||
--index \\$par_index \\\\
|
-i \\$par_index \\\\
|
||||||
\\${par_gtf:+--gtf \\$par_gtf} \\\\
|
\\${par_gtf:+--gtf "\\${par_gtf}"} \\\\
|
||||||
\\${par_chromosomes:+--chromosomes \\$par_chromosomes} \\\\
|
\\${par_single:+--single} \\\\
|
||||||
\\$single_end_params \\\\
|
\\${par_single_overhang:+--single-overhang} \\\\
|
||||||
\\$strandedness \\\\
|
\\${par_fr_stranded:+--fr-stranded} \\\\
|
||||||
\\$par_extra_args \\\\
|
\\${par_rf_stranded:+--rf-stranded} \\\\
|
||||||
-o \\$par_output \\\\
|
\\${par_plaintext:+--plaintext} \\\\
|
||||||
\\${input[*]} 2> >(tee -a \\${par_output}/kallisto_quant.log >&2)
|
\\${par_bootstrap_samples:+--bootstrap-samples "\\${par_bootstrap_samples}"} \\\\
|
||||||
|
\\${par_fragment_length:+--fragment-length "\\${par_fragment_length}"} \\\\
|
||||||
mv \\${par_output}/kallisto_quant.log \\${par_log}
|
\\${par_sd:+--sd "\\${par_sd}"} \\\\
|
||||||
mv \\${par_output}/run_info.json \\${par_run_info}
|
\\${par_seed:+--seed "\\${par_seed}"} \\\\
|
||||||
cp \\${par_output}/abundance.tsv \\${par_quant_results_file}
|
-o \\$par_output_dir \\\\
|
||||||
|
\\${input[*]} 2> >(tee -a \\$par_log >&2)
|
||||||
VIASHMAIN
|
VIASHMAIN
|
||||||
bash "$tempscript"
|
bash "$tempscript"
|
||||||
'''
|
'''
|
||||||
@@ -3599,7 +3583,7 @@ meta["defaults"] = [
|
|||||||
directives: readJsonBlob('''{
|
directives: readJsonBlob('''{
|
||||||
"container" : {
|
"container" : {
|
||||||
"registry" : "images.viash-hub.com",
|
"registry" : "images.viash-hub.com",
|
||||||
"image" : "vsh/rnaseq/kallisto/kallisto_quant",
|
"image" : "vsh/biobox/kallisto/kallisto_quant",
|
||||||
"tag" : "main"
|
"tag" : "main"
|
||||||
},
|
},
|
||||||
"tag" : "$id"
|
"tag" : "$id"
|
||||||
@@ -0,0 +1,125 @@
|
|||||||
|
manifest {
|
||||||
|
name = 'kallisto/kallisto_quant'
|
||||||
|
mainScript = 'main.nf'
|
||||||
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
|
version = 'main'
|
||||||
|
description = 'Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
process.container = 'nextflow/bash:latest'
|
||||||
|
|
||||||
|
// detect tempdir
|
||||||
|
tempDir = java.nio.file.Paths.get(
|
||||||
|
System.getenv('NXF_TEMP') ?:
|
||||||
|
System.getenv('VIASH_TEMP') ?:
|
||||||
|
System.getenv('TEMPDIR') ?:
|
||||||
|
System.getenv('TMPDIR') ?:
|
||||||
|
'/tmp'
|
||||||
|
).toAbsolutePath()
|
||||||
|
|
||||||
|
profiles {
|
||||||
|
no_publish {
|
||||||
|
process {
|
||||||
|
withName: '.*' {
|
||||||
|
publishDir = [
|
||||||
|
enabled: false
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mount_temp {
|
||||||
|
docker.temp = tempDir
|
||||||
|
podman.temp = tempDir
|
||||||
|
charliecloud.temp = tempDir
|
||||||
|
}
|
||||||
|
docker {
|
||||||
|
docker.enabled = true
|
||||||
|
// docker.userEmulation = true
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
singularity {
|
||||||
|
singularity.enabled = true
|
||||||
|
singularity.autoMounts = true
|
||||||
|
docker.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
podman {
|
||||||
|
podman.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
shifter {
|
||||||
|
shifter.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
charliecloud.enabled = false
|
||||||
|
}
|
||||||
|
charliecloud {
|
||||||
|
charliecloud.enabled = true
|
||||||
|
docker.enabled = false
|
||||||
|
singularity.enabled = false
|
||||||
|
podman.enabled = false
|
||||||
|
shifter.enabled = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process{
|
||||||
|
withLabel: mem1gb { memory = 1000000000.B }
|
||||||
|
withLabel: mem2gb { memory = 2000000000.B }
|
||||||
|
withLabel: mem5gb { memory = 5000000000.B }
|
||||||
|
withLabel: mem10gb { memory = 10000000000.B }
|
||||||
|
withLabel: mem20gb { memory = 20000000000.B }
|
||||||
|
withLabel: mem50gb { memory = 50000000000.B }
|
||||||
|
withLabel: mem100gb { memory = 100000000000.B }
|
||||||
|
withLabel: mem200gb { memory = 200000000000.B }
|
||||||
|
withLabel: mem500gb { memory = 500000000000.B }
|
||||||
|
withLabel: mem1tb { memory = 1000000000000.B }
|
||||||
|
withLabel: mem2tb { memory = 2000000000000.B }
|
||||||
|
withLabel: mem5tb { memory = 5000000000000.B }
|
||||||
|
withLabel: mem10tb { memory = 10000000000000.B }
|
||||||
|
withLabel: mem20tb { memory = 20000000000000.B }
|
||||||
|
withLabel: mem50tb { memory = 50000000000000.B }
|
||||||
|
withLabel: mem100tb { memory = 100000000000000.B }
|
||||||
|
withLabel: mem200tb { memory = 200000000000000.B }
|
||||||
|
withLabel: mem500tb { memory = 500000000000000.B }
|
||||||
|
withLabel: mem1gib { memory = 1073741824.B }
|
||||||
|
withLabel: mem2gib { memory = 2147483648.B }
|
||||||
|
withLabel: mem4gib { memory = 4294967296.B }
|
||||||
|
withLabel: mem8gib { memory = 8589934592.B }
|
||||||
|
withLabel: mem16gib { memory = 17179869184.B }
|
||||||
|
withLabel: mem32gib { memory = 34359738368.B }
|
||||||
|
withLabel: mem64gib { memory = 68719476736.B }
|
||||||
|
withLabel: mem128gib { memory = 137438953472.B }
|
||||||
|
withLabel: mem256gib { memory = 274877906944.B }
|
||||||
|
withLabel: mem512gib { memory = 549755813888.B }
|
||||||
|
withLabel: mem1tib { memory = 1099511627776.B }
|
||||||
|
withLabel: mem2tib { memory = 2199023255552.B }
|
||||||
|
withLabel: mem4tib { memory = 4398046511104.B }
|
||||||
|
withLabel: mem8tib { memory = 8796093022208.B }
|
||||||
|
withLabel: mem16tib { memory = 17592186044416.B }
|
||||||
|
withLabel: mem32tib { memory = 35184372088832.B }
|
||||||
|
withLabel: mem64tib { memory = 70368744177664.B }
|
||||||
|
withLabel: mem128tib { memory = 140737488355328.B }
|
||||||
|
withLabel: mem256tib { memory = 281474976710656.B }
|
||||||
|
withLabel: mem512tib { memory = 562949953421312.B }
|
||||||
|
withLabel: cpu1 { cpus = 1 }
|
||||||
|
withLabel: cpu2 { cpus = 2 }
|
||||||
|
withLabel: cpu5 { cpus = 5 }
|
||||||
|
withLabel: cpu10 { cpus = 10 }
|
||||||
|
withLabel: cpu20 { cpus = 20 }
|
||||||
|
withLabel: cpu50 { cpus = 50 }
|
||||||
|
withLabel: cpu100 { cpus = 100 }
|
||||||
|
withLabel: cpu200 { cpus = 200 }
|
||||||
|
withLabel: cpu500 { cpus = 500 }
|
||||||
|
withLabel: cpu1000 { cpus = 1000 }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,225 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
|
"title": "kallisto_quant",
|
||||||
|
"description": "Quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads.\n",
|
||||||
|
"type": "object",
|
||||||
|
"definitions": {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"input" : {
|
||||||
|
"title": "Input",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"input": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively",
|
||||||
|
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"index": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, required. Kallisto genome index",
|
||||||
|
"help_text": "Type: `file`, required. Kallisto genome index."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"output" : {
|
||||||
|
"title": "Output",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"output_dir": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to",
|
||||||
|
"help_text": "Type: `file`, required, default: `$id.$key.output_dir.output_dir`. Directory to write output to."
|
||||||
|
,
|
||||||
|
"default": "$id.$key.output_dir.output_dir"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"log": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.log.log`. File containing log information from running kallisto quant"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.log.log"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"options" : {
|
||||||
|
"title": "Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"single": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Single end mode",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Single end mode."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"single_overhang": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Include reads where unobserved rest of fragment is predicted to lie outside a transcript."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fr_stranded": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read forward."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"rf_stranded": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Strand specific reads, first read reverse."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fragment_length": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`. The estimated average fragment length",
|
||||||
|
"help_text": "Type: `double`. The estimated average fragment length."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"sd": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single)",
|
||||||
|
"help_text": "Type: `double`. The estimated standard deviation of the fragment length (default: -l, -s values are estimated \nfrom paired end data, but are required when using --single).\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"plaintext": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Output plaintext instead of HDF5."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bootstrap_samples": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `0`. Number of bootstrap samples to draw",
|
||||||
|
"help_text": "Type: `integer`, example: `0`. Number of bootstrap samples to draw. Default: \u00270\u0027\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"seed": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `42`. Random seed for bootstrap",
|
||||||
|
"help_text": "Type: `integer`, example: `42`. Random seed for bootstrap. Default: \u002742\u0027\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"nextflow input-output arguments" : {
|
||||||
|
"title": "Nextflow input-output arguments",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"publish_dir": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||||
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"param_list": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||||
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||||
|
"hidden": true
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"allOf": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/input"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/output"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -1,12 +1,30 @@
|
|||||||
name: "qualimap"
|
name: "qualimap_rnaseq"
|
||||||
|
namespace: "qualimap"
|
||||||
version: "main"
|
version: "main"
|
||||||
|
authors:
|
||||||
|
- name: "Dorien Roosen"
|
||||||
|
roles:
|
||||||
|
- "author"
|
||||||
|
- "maintainer"
|
||||||
|
info:
|
||||||
|
links:
|
||||||
|
email: "dorien@data-intuitive.com"
|
||||||
|
github: "dorien-er"
|
||||||
|
linkedin: "dorien-roosen"
|
||||||
|
organizations:
|
||||||
|
- name: "Data Intuitive"
|
||||||
|
href: "https://www.data-intuitive.com"
|
||||||
|
role: "Data Scientist"
|
||||||
argument_groups:
|
argument_groups:
|
||||||
- name: "Input"
|
- name: "Input"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--input"
|
name: "--bam"
|
||||||
description: "path to input mapping file in BAM format."
|
description: "Path to the sequence alignment file in BAM format, produced by a\
|
||||||
|
\ splicing-aware aligner."
|
||||||
info: null
|
info: null
|
||||||
|
example:
|
||||||
|
- "alignment.bam"
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: true
|
required: true
|
||||||
@@ -15,8 +33,10 @@ argument_groups:
|
|||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--gtf"
|
name: "--gtf"
|
||||||
description: "path to annotations file in Ensembl GTF format."
|
description: "Path to genomic annotations in Ensembl GTF format."
|
||||||
info: null
|
info: null
|
||||||
|
example:
|
||||||
|
- "annotations.gtf"
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: true
|
required: true
|
||||||
@@ -26,11 +46,21 @@ argument_groups:
|
|||||||
- name: "Output"
|
- name: "Output"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--output_dir"
|
name: "--qc_results"
|
||||||
description: "path to output directory for raw data and report."
|
description: "Text file containing the RNAseq QC results."
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "rnaseq_qc_results.txt"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: true
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--counts"
|
||||||
|
description: "Output file for computed counts."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "$id.qualimap_output"
|
|
||||||
must_exist: true
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: false
|
||||||
@@ -38,48 +68,34 @@ argument_groups:
|
|||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "file"
|
- type: "file"
|
||||||
name: "--output_pdf"
|
name: "--report"
|
||||||
description: "path to output file for pdf report."
|
description: "Report output file. Supported formats are PDF or HTML."
|
||||||
info: null
|
info: null
|
||||||
default:
|
example:
|
||||||
- "$id.report.pdf"
|
- "report.html"
|
||||||
must_exist: false
|
must_exist: true
|
||||||
create_parent: true
|
create_parent: true
|
||||||
required: false
|
required: false
|
||||||
direction: "output"
|
direction: "output"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "string"
|
|
||||||
name: "--output_format"
|
|
||||||
description: "Format of the output report (PDF or HTML, default is HTML)"
|
|
||||||
info: null
|
|
||||||
default:
|
|
||||||
- "html"
|
|
||||||
required: false
|
|
||||||
direction: "input"
|
|
||||||
multiple: false
|
|
||||||
multiple_sep: ";"
|
|
||||||
- name: "Optional"
|
- name: "Optional"
|
||||||
arguments:
|
arguments:
|
||||||
- type: "integer"
|
- type: "integer"
|
||||||
name: "--pr_bases"
|
name: "--num_pr_bases"
|
||||||
description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\
|
description: "Number of upstream/downstream nucleotide bases to compute 5'-3'\
|
||||||
\ bias (default = 100)."
|
\ bias (default = 100)."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- 100
|
|
||||||
required: false
|
required: false
|
||||||
min: 1
|
min: 1
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
- type: "integer"
|
- type: "integer"
|
||||||
name: "--tr_bias"
|
name: "--num_tr_bias"
|
||||||
description: "Number of top highly expressed transcripts to compute 5'-3' bias\
|
description: "Number of top highly expressed transcripts to compute 5'-3' bias\
|
||||||
\ (default = 1000)."
|
\ (default = 1000)."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- 1000
|
|
||||||
required: false
|
required: false
|
||||||
min: 1
|
min: 1
|
||||||
direction: "input"
|
direction: "input"
|
||||||
@@ -89,9 +105,10 @@ argument_groups:
|
|||||||
name: "--algorithm"
|
name: "--algorithm"
|
||||||
description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)."
|
description: "Counting algorithm (uniquely-mapped-reads (default) or proportional)."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "uniquely-mapped-reads"
|
|
||||||
required: false
|
required: false
|
||||||
|
choices:
|
||||||
|
- "uniquely-mapped-reads"
|
||||||
|
- "proportional"
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
multiple_sep: ";"
|
multiple_sep: ";"
|
||||||
@@ -100,8 +117,6 @@ argument_groups:
|
|||||||
description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\
|
description: "Sequencing library protocol (strand-specific-forward, strand-specific-reverse\
|
||||||
\ or non-strand-specific (default))."
|
\ or non-strand-specific (default))."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "non-strand-specific"
|
|
||||||
required: false
|
required: false
|
||||||
choices:
|
choices:
|
||||||
- "non-strand-specific"
|
- "non-strand-specific"
|
||||||
@@ -127,8 +142,6 @@ argument_groups:
|
|||||||
name: "--java_memory_size"
|
name: "--java_memory_size"
|
||||||
description: "maximum Java heap memory size, default = 4G."
|
description: "maximum Java heap memory size, default = 4G."
|
||||||
info: null
|
info: null
|
||||||
default:
|
|
||||||
- "4G"
|
|
||||||
required: false
|
required: false
|
||||||
direction: "input"
|
direction: "input"
|
||||||
multiple: false
|
multiple: false
|
||||||
@@ -137,36 +150,33 @@ resources:
|
|||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "script.sh"
|
path: "script.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
description: "RNA-seq QC analysis using the qualimap \n"
|
description: "Qualimap RNA-seq QC reports quality control metrics and bias estimations\
|
||||||
|
\ \nwhich are specific for whole transcriptome sequencing, including reads genomic\
|
||||||
|
\ \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n"
|
||||||
test_resources:
|
test_resources:
|
||||||
- type: "bash_script"
|
- type: "bash_script"
|
||||||
path: "test.sh"
|
path: "test.sh"
|
||||||
is_executable: true
|
is_executable: true
|
||||||
- type: "file"
|
- type: "file"
|
||||||
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
|
path: "test_data"
|
||||||
- type: "file"
|
info: null
|
||||||
path: "wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai"
|
|
||||||
- type: "file"
|
|
||||||
path: "genes.gtf"
|
|
||||||
info:
|
|
||||||
migration_info:
|
|
||||||
git_repo: "https://github.com/nf-core/rnaseq.git"
|
|
||||||
paths:
|
|
||||||
- "modules/nf-core/qualimap/rnaseq/main.nf"
|
|
||||||
last_sha: "54721c6946daf6d602d7069dc127deef9cbe6b33"
|
|
||||||
status: "enabled"
|
status: "enabled"
|
||||||
requirements:
|
requirements:
|
||||||
commands:
|
commands:
|
||||||
- "ps"
|
- "ps"
|
||||||
repositories:
|
keywords:
|
||||||
- type: "vsh"
|
- "RNA-seq"
|
||||||
name: "biobox"
|
- "quality control"
|
||||||
repo: "vsh/biobox"
|
- "QC Report"
|
||||||
tag: "main"
|
license: "GPL-2.0"
|
||||||
- type: "vsh"
|
references:
|
||||||
name: "craftbox"
|
doi:
|
||||||
repo: "craftbox"
|
- "10.1093/bioinformatics/btv566"
|
||||||
tag: "v0.1.0"
|
links:
|
||||||
|
repository: "https://bitbucket.org/kokonech/qualimap/commits/branch/master"
|
||||||
|
homepage: "http://qualimap.conesalab.org/"
|
||||||
|
documentation: "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc"
|
||||||
|
issue_tracker: "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open"
|
||||||
runners:
|
runners:
|
||||||
- type: "executable"
|
- type: "executable"
|
||||||
id: "executable"
|
id: "executable"
|
||||||
@@ -235,67 +245,47 @@ runners:
|
|||||||
engines:
|
engines:
|
||||||
- type: "docker"
|
- type: "docker"
|
||||||
id: "docker"
|
id: "docker"
|
||||||
image: "ubuntu:22.04"
|
image: "quay.io/biocontainers/qualimap:2.3--hdfd78af_0"
|
||||||
target_registry: "images.viash-hub.com"
|
target_registry: "images.viash-hub.com"
|
||||||
target_tag: "main"
|
target_tag: "main"
|
||||||
namespace_separator: "/"
|
namespace_separator: "/"
|
||||||
setup:
|
setup:
|
||||||
- type: "apt"
|
|
||||||
packages:
|
|
||||||
- "r-base"
|
|
||||||
- "unzip"
|
|
||||||
- "wget"
|
|
||||||
- "openjdk-8-jdk"
|
|
||||||
- "libxml2-dev"
|
|
||||||
- "libcurl4-openssl-dev"
|
|
||||||
interactive: false
|
|
||||||
- type: "docker"
|
- type: "docker"
|
||||||
run:
|
run:
|
||||||
- "wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip &&\
|
- "echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n"
|
||||||
\ \\\nunzip qualimap_v2.3.zip && \\\ncp -a qualimap_v2.3/. usr/bin && \\\nunset\
|
|
||||||
\ DISPLAY && \\\nmkdir -p tmp && \\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n"
|
|
||||||
- type: "r"
|
|
||||||
cran:
|
|
||||||
- "optparse"
|
|
||||||
bioc:
|
|
||||||
- "NOISeqr"
|
|
||||||
bioc_force_install: false
|
|
||||||
entrypoint: []
|
entrypoint: []
|
||||||
cmd: null
|
cmd: null
|
||||||
- type: "native"
|
- type: "native"
|
||||||
id: "native"
|
id: "native"
|
||||||
build_info:
|
build_info:
|
||||||
config: "src/qualimap/config.vsh.yaml"
|
config: "src/qualimap/qualimap_rnaseq/config.vsh.yaml"
|
||||||
runner: "nextflow"
|
runner: "nextflow"
|
||||||
engine: "docker|native"
|
engine: "docker|native"
|
||||||
output: "target/nextflow/qualimap"
|
output: "target/nextflow/qualimap/qualimap_rnaseq"
|
||||||
executable: "target/nextflow/qualimap/main.nf"
|
executable: "target/nextflow/qualimap/qualimap_rnaseq/main.nf"
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
git_commit: "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3"
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
git_remote: "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
package_config:
|
package_config:
|
||||||
name: "rnaseq"
|
name: "biobox"
|
||||||
version: "main"
|
version: "main"
|
||||||
info:
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
test_resources:
|
info: null
|
||||||
- path: "gs://viash-hub-test-data/rnaseq/v1"
|
|
||||||
dest: "testData"
|
|
||||||
repositories:
|
|
||||||
- type: "vsh"
|
|
||||||
name: "biobox"
|
|
||||||
repo: "vsh/biobox"
|
|
||||||
tag: "main"
|
|
||||||
- type: "vsh"
|
|
||||||
name: "craftbox"
|
|
||||||
repo: "craftbox"
|
|
||||||
tag: "v0.1.0"
|
|
||||||
viash_version: "0.9.0"
|
viash_version: "0.9.0"
|
||||||
source: "src"
|
source: "src"
|
||||||
target: "target"
|
target: "target"
|
||||||
config_mods:
|
config_mods:
|
||||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
|
- ".requirements.commands := ['ps']\n"
|
||||||
\ := '$id'\n"
|
|
||||||
- ".engines += { type: \"native\" }"
|
- ".engines += { type: \"native\" }"
|
||||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
organization: "vsh"
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
// qualimap main
|
// qualimap_rnaseq main
|
||||||
//
|
//
|
||||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||||
@@ -8,6 +8,9 @@
|
|||||||
// authors of this component should specify the license in the header of such
|
// authors of this component should specify the license in the header of such
|
||||||
// files, or include a separate license file detailing the licenses of all included
|
// files, or include a separate license file detailing the licenses of all included
|
||||||
// files.
|
// files.
|
||||||
|
//
|
||||||
|
// Component authors:
|
||||||
|
// * Dorien Roosen (author, maintainer)
|
||||||
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// VDSL3 helper functions //
|
// VDSL3 helper functions //
|
||||||
@@ -2804,16 +2807,43 @@ nextflow.enable.dsl=2
|
|||||||
meta = [
|
meta = [
|
||||||
"resources_dir": moduleDir.toRealPath().normalize(),
|
"resources_dir": moduleDir.toRealPath().normalize(),
|
||||||
"config": processConfig(readJsonBlob('''{
|
"config": processConfig(readJsonBlob('''{
|
||||||
"name" : "qualimap",
|
"name" : "qualimap_rnaseq",
|
||||||
|
"namespace" : "qualimap",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
|
"authors" : [
|
||||||
|
{
|
||||||
|
"name" : "Dorien Roosen",
|
||||||
|
"roles" : [
|
||||||
|
"author",
|
||||||
|
"maintainer"
|
||||||
|
],
|
||||||
|
"info" : {
|
||||||
|
"links" : {
|
||||||
|
"email" : "dorien@data-intuitive.com",
|
||||||
|
"github" : "dorien-er",
|
||||||
|
"linkedin" : "dorien-roosen"
|
||||||
|
},
|
||||||
|
"organizations" : [
|
||||||
|
{
|
||||||
|
"name" : "Data Intuitive",
|
||||||
|
"href" : "https://www.data-intuitive.com",
|
||||||
|
"role" : "Data Scientist"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"argument_groups" : [
|
"argument_groups" : [
|
||||||
{
|
{
|
||||||
"name" : "Input",
|
"name" : "Input",
|
||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--input",
|
"name" : "--bam",
|
||||||
"description" : "path to input mapping file in BAM format.",
|
"description" : "Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner.",
|
||||||
|
"example" : [
|
||||||
|
"alignment.bam"
|
||||||
|
],
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : true,
|
"required" : true,
|
||||||
@@ -2824,7 +2854,10 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--gtf",
|
"name" : "--gtf",
|
||||||
"description" : "path to annotations file in Ensembl GTF format.",
|
"description" : "Path to genomic annotations in Ensembl GTF format.",
|
||||||
|
"example" : [
|
||||||
|
"annotations.gtf"
|
||||||
|
],
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : true,
|
"required" : true,
|
||||||
@@ -2839,13 +2872,24 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--output_dir",
|
"name" : "--qc_results",
|
||||||
"description" : "path to output directory for raw data and report.",
|
"description" : "Text file containing the RNAseq QC results.",
|
||||||
"default" : [
|
"example" : [
|
||||||
"$id.qualimap_output"
|
"rnaseq_qc_results.txt"
|
||||||
],
|
],
|
||||||
"must_exist" : true,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
|
"required" : true,
|
||||||
|
"direction" : "output",
|
||||||
|
"multiple" : false,
|
||||||
|
"multiple_sep" : ";"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type" : "file",
|
||||||
|
"name" : "--counts",
|
||||||
|
"description" : "Output file for computed counts.",
|
||||||
|
"must_exist" : true,
|
||||||
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
@@ -2853,29 +2897,17 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"name" : "--output_pdf",
|
"name" : "--report",
|
||||||
"description" : "path to output file for pdf report.",
|
"description" : "Report output file. Supported formats are PDF or HTML.",
|
||||||
"default" : [
|
"example" : [
|
||||||
"$id.report.pdf"
|
"report.html"
|
||||||
],
|
],
|
||||||
"must_exist" : false,
|
"must_exist" : true,
|
||||||
"create_parent" : true,
|
"create_parent" : true,
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "output",
|
"direction" : "output",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "string",
|
|
||||||
"name" : "--output_format",
|
|
||||||
"description" : "Format of the output report (PDF or HTML, default is HTML)",
|
|
||||||
"default" : [
|
|
||||||
"html"
|
|
||||||
],
|
|
||||||
"required" : false,
|
|
||||||
"direction" : "input",
|
|
||||||
"multiple" : false,
|
|
||||||
"multiple_sep" : ";"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -2884,11 +2916,8 @@ meta = [
|
|||||||
"arguments" : [
|
"arguments" : [
|
||||||
{
|
{
|
||||||
"type" : "integer",
|
"type" : "integer",
|
||||||
"name" : "--pr_bases",
|
"name" : "--num_pr_bases",
|
||||||
"description" : "Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).",
|
"description" : "Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100).",
|
||||||
"default" : [
|
|
||||||
100
|
|
||||||
],
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"min" : 1,
|
"min" : 1,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
@@ -2897,11 +2926,8 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "integer",
|
"type" : "integer",
|
||||||
"name" : "--tr_bias",
|
"name" : "--num_tr_bias",
|
||||||
"description" : "Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).",
|
"description" : "Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000).",
|
||||||
"default" : [
|
|
||||||
1000
|
|
||||||
],
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"min" : 1,
|
"min" : 1,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
@@ -2912,10 +2938,11 @@ meta = [
|
|||||||
"type" : "string",
|
"type" : "string",
|
||||||
"name" : "--algorithm",
|
"name" : "--algorithm",
|
||||||
"description" : "Counting algorithm (uniquely-mapped-reads (default) or proportional).",
|
"description" : "Counting algorithm (uniquely-mapped-reads (default) or proportional).",
|
||||||
"default" : [
|
|
||||||
"uniquely-mapped-reads"
|
|
||||||
],
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
|
"choices" : [
|
||||||
|
"uniquely-mapped-reads",
|
||||||
|
"proportional"
|
||||||
|
],
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
"multiple_sep" : ";"
|
"multiple_sep" : ";"
|
||||||
@@ -2924,9 +2951,6 @@ meta = [
|
|||||||
"type" : "string",
|
"type" : "string",
|
||||||
"name" : "--sequencing_protocol",
|
"name" : "--sequencing_protocol",
|
||||||
"description" : "Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
|
"description" : "Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
|
||||||
"default" : [
|
|
||||||
"non-strand-specific"
|
|
||||||
],
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"choices" : [
|
"choices" : [
|
||||||
"non-strand-specific",
|
"non-strand-specific",
|
||||||
@@ -2953,9 +2977,6 @@ meta = [
|
|||||||
"type" : "string",
|
"type" : "string",
|
||||||
"name" : "--java_memory_size",
|
"name" : "--java_memory_size",
|
||||||
"description" : "maximum Java heap memory size, default = 4G.",
|
"description" : "maximum Java heap memory size, default = 4G.",
|
||||||
"default" : [
|
|
||||||
"4G"
|
|
||||||
],
|
|
||||||
"required" : false,
|
"required" : false,
|
||||||
"direction" : "input",
|
"direction" : "input",
|
||||||
"multiple" : false,
|
"multiple" : false,
|
||||||
@@ -2971,7 +2992,7 @@ meta = [
|
|||||||
"is_executable" : true
|
"is_executable" : true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description" : "RNA-seq QC analysis using the qualimap \n",
|
"description" : "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n",
|
||||||
"test_resources" : [
|
"test_resources" : [
|
||||||
{
|
{
|
||||||
"type" : "bash_script",
|
"type" : "bash_script",
|
||||||
@@ -2980,46 +3001,32 @@ meta = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type" : "file",
|
"type" : "file",
|
||||||
"path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam"
|
"path" : "test_data/"
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/unit_test_resources/wgEncodeCaltechRnaSeqGm12878R1x75dAlignsRep2V2.bam.bai"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "file",
|
|
||||||
"path" : "/testData/unit_test_resources/genes.gtf"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"info" : {
|
|
||||||
"migration_info" : {
|
|
||||||
"git_repo" : "https://github.com/nf-core/rnaseq.git",
|
|
||||||
"paths" : [
|
|
||||||
"modules/nf-core/qualimap/rnaseq/main.nf"
|
|
||||||
],
|
|
||||||
"last_sha" : "54721c6946daf6d602d7069dc127deef9cbe6b33"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"status" : "enabled",
|
"status" : "enabled",
|
||||||
"requirements" : {
|
"requirements" : {
|
||||||
"commands" : [
|
"commands" : [
|
||||||
"ps"
|
"ps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"repositories" : [
|
"keywords" : [
|
||||||
{
|
"RNA-seq",
|
||||||
"type" : "vsh",
|
"quality control",
|
||||||
"name" : "biobox",
|
"QC Report"
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
|
"license" : "GPL-2.0",
|
||||||
|
"references" : {
|
||||||
|
"doi" : [
|
||||||
|
"10.1093/bioinformatics/btv566"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://bitbucket.org/kokonech/qualimap/commits/branch/master",
|
||||||
|
"homepage" : "http://qualimap.conesalab.org/",
|
||||||
|
"documentation" : "http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc",
|
||||||
|
"issue_tracker" : "https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open"
|
||||||
|
},
|
||||||
"runners" : [
|
"runners" : [
|
||||||
{
|
{
|
||||||
"type" : "executable",
|
"type" : "executable",
|
||||||
@@ -3098,38 +3105,16 @@ meta = [
|
|||||||
{
|
{
|
||||||
"type" : "docker",
|
"type" : "docker",
|
||||||
"id" : "docker",
|
"id" : "docker",
|
||||||
"image" : "ubuntu:22.04",
|
"image" : "quay.io/biocontainers/qualimap:2.3--hdfd78af_0",
|
||||||
"target_registry" : "images.viash-hub.com",
|
"target_registry" : "images.viash-hub.com",
|
||||||
"target_tag" : "main",
|
"target_tag" : "main",
|
||||||
"namespace_separator" : "/",
|
"namespace_separator" : "/",
|
||||||
"setup" : [
|
"setup" : [
|
||||||
{
|
|
||||||
"type" : "apt",
|
|
||||||
"packages" : [
|
|
||||||
"r-base",
|
|
||||||
"unzip",
|
|
||||||
"wget",
|
|
||||||
"openjdk-8-jdk",
|
|
||||||
"libxml2-dev",
|
|
||||||
"libcurl4-openssl-dev"
|
|
||||||
],
|
|
||||||
"interactive" : false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type" : "docker",
|
"type" : "docker",
|
||||||
"run" : [
|
"run" : [
|
||||||
"wget https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.3.zip && \\\\\nunzip qualimap_v2.3.zip && \\\\\ncp -a qualimap_v2.3/. usr/bin && \\\\\nunset DISPLAY && \\\\\nmkdir -p tmp && \\\\\nexport _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp\n"
|
"echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt\n"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "r",
|
|
||||||
"cran" : [
|
|
||||||
"optparse"
|
|
||||||
],
|
|
||||||
"bioc" : [
|
|
||||||
"NOISeqr"
|
|
||||||
],
|
|
||||||
"bioc_force_install" : false
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -3139,49 +3124,39 @@ meta = [
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"build_info" : {
|
"build_info" : {
|
||||||
"config" : "/workdir/root/repo/src/qualimap/config.vsh.yaml",
|
"config" : "/workdir/root/repo/src/qualimap/qualimap_rnaseq/config.vsh.yaml",
|
||||||
"runner" : "nextflow",
|
"runner" : "nextflow",
|
||||||
"engine" : "docker|native",
|
"engine" : "docker|native",
|
||||||
"output" : "/workdir/root/repo/target/nextflow/qualimap",
|
"output" : "target/nextflow/qualimap/qualimap_rnaseq",
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"git_commit" : "ce40a4a6d9e94ca2d63978c9b4c2ea4004b9fcb3",
|
"git_commit" : "a13b57d04a3f3741eedd1af10fd96a9bee126f55",
|
||||||
"git_remote" : "https://x-access-token:ghs_ot0XYuiYvcS5ZVYMUffn1TKOgZgnL00x8gE9@github.com/viash-hub/rnaseq"
|
"git_remote" : "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox",
|
||||||
|
"git_tag" : "v0.2.0-26-ga13b57d"
|
||||||
},
|
},
|
||||||
"package_config" : {
|
"package_config" : {
|
||||||
"name" : "rnaseq",
|
"name" : "biobox",
|
||||||
"version" : "main",
|
"version" : "main",
|
||||||
"info" : {
|
"description" : "A collection of bioinformatics tools for working with sequence data.\n",
|
||||||
"test_resources" : [
|
|
||||||
{
|
|
||||||
"path" : "gs://viash-hub-test-data/rnaseq/v1",
|
|
||||||
"dest" : "testData"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"repositories" : [
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "biobox",
|
|
||||||
"repo" : "vsh/biobox",
|
|
||||||
"tag" : "main"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type" : "vsh",
|
|
||||||
"name" : "craftbox",
|
|
||||||
"repo" : "craftbox",
|
|
||||||
"tag" : "v0.1.0"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"viash_version" : "0.9.0",
|
"viash_version" : "0.9.0",
|
||||||
"source" : "/workdir/root/repo/src",
|
"source" : "src",
|
||||||
"target" : "/workdir/root/repo/target",
|
"target" : "target",
|
||||||
"config_mods" : [
|
"config_mods" : [
|
||||||
".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag := '$id'\n",
|
".requirements.commands := ['ps']\n",
|
||||||
".engines += { type: \\"native\\" }",
|
".engines += { type: \\"native\\" }",
|
||||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||||
".engines[.type == 'docker'].target_tag := 'main'"
|
".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
],
|
],
|
||||||
"organization" : "vsh"
|
"keywords" : [
|
||||||
|
"bioinformatics",
|
||||||
|
"modules",
|
||||||
|
"sequencing"
|
||||||
|
],
|
||||||
|
"license" : "MIT",
|
||||||
|
"organization" : "vsh",
|
||||||
|
"links" : {
|
||||||
|
"repository" : "https://github.com/viash-hub/biobox",
|
||||||
|
"issue_tracker" : "https://github.com/viash-hub/biobox/issues"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'''))
|
}'''))
|
||||||
]
|
]
|
||||||
@@ -3197,13 +3172,13 @@ tempscript=".viash_script.sh"
|
|||||||
cat > "$tempscript" << VIASHMAIN
|
cat > "$tempscript" << VIASHMAIN
|
||||||
## VIASH START
|
## VIASH START
|
||||||
# The following code has been auto-generated by Viash.
|
# The following code has been auto-generated by Viash.
|
||||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
$( if [ ! -z ${VIASH_PAR_BAM+x} ]; then echo "${VIASH_PAR_BAM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bam='&'#" ; else echo "# par_bam="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi )
|
$( if [ ! -z ${VIASH_PAR_GTF+x} ]; then echo "${VIASH_PAR_GTF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_gtf='&'#" ; else echo "# par_gtf="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_OUTPUT_DIR+x} ]; then echo "${VIASH_PAR_OUTPUT_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_dir='&'#" ; else echo "# par_output_dir="; fi )
|
$( if [ ! -z ${VIASH_PAR_QC_RESULTS+x} ]; then echo "${VIASH_PAR_QC_RESULTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_qc_results='&'#" ; else echo "# par_qc_results="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_OUTPUT_PDF+x} ]; then echo "${VIASH_PAR_OUTPUT_PDF}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_pdf='&'#" ; else echo "# par_output_pdf="; fi )
|
$( if [ ! -z ${VIASH_PAR_COUNTS+x} ]; then echo "${VIASH_PAR_COUNTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_counts='&'#" ; else echo "# par_counts="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_OUTPUT_FORMAT+x} ]; then echo "${VIASH_PAR_OUTPUT_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_format='&'#" ; else echo "# par_output_format="; fi )
|
$( if [ ! -z ${VIASH_PAR_REPORT+x} ]; then echo "${VIASH_PAR_REPORT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report='&'#" ; else echo "# par_report="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_PR_BASES+x} ]; then echo "${VIASH_PAR_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_pr_bases='&'#" ; else echo "# par_pr_bases="; fi )
|
$( if [ ! -z ${VIASH_PAR_NUM_PR_BASES+x} ]; then echo "${VIASH_PAR_NUM_PR_BASES}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_pr_bases='&'#" ; else echo "# par_num_pr_bases="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_TR_BIAS+x} ]; then echo "${VIASH_PAR_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_tr_bias='&'#" ; else echo "# par_tr_bias="; fi )
|
$( if [ ! -z ${VIASH_PAR_NUM_TR_BIAS+x} ]; then echo "${VIASH_PAR_NUM_TR_BIAS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_num_tr_bias='&'#" ; else echo "# par_num_tr_bias="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_ALGORITHM+x} ]; then echo "${VIASH_PAR_ALGORITHM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_algorithm='&'#" ; else echo "# par_algorithm="; fi )
|
$( if [ ! -z ${VIASH_PAR_ALGORITHM+x} ]; then echo "${VIASH_PAR_ALGORITHM}" | sed "s#'#'\\"'\\"'#g;s#.*#par_algorithm='&'#" ; else echo "# par_algorithm="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then echo "${VIASH_PAR_SEQUENCING_PROTOCOL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sequencing_protocol='&'#" ; else echo "# par_sequencing_protocol="; fi )
|
$( if [ ! -z ${VIASH_PAR_SEQUENCING_PROTOCOL+x} ]; then echo "${VIASH_PAR_SEQUENCING_PROTOCOL}" | sed "s#'#'\\"'\\"'#g;s#.*#par_sequencing_protocol='&'#" ; else echo "# par_sequencing_protocol="; fi )
|
||||||
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
|
$( if [ ! -z ${VIASH_PAR_PAIRED+x} ]; then echo "${VIASH_PAR_PAIRED}" | sed "s#'#'\\"'\\"'#g;s#.*#par_paired='&'#" ; else echo "# par_paired="; fi )
|
||||||
@@ -3233,20 +3208,52 @@ $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}"
|
|||||||
|
|
||||||
set -eo pipefail
|
set -eo pipefail
|
||||||
|
|
||||||
mkdir -p \\$par_output_dir
|
tmp_dir=\\$(mktemp -d -p "\\$meta_temp_dir" qualimap_XXXXXXXXX)
|
||||||
|
|
||||||
|
# Handle output parameters
|
||||||
|
if [ -n "\\$par_report" ]; then
|
||||||
|
outfile=\\$(basename "\\$par_report")
|
||||||
|
report_extension="\\${outfile##*.}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "\\$par_counts" ]; then
|
||||||
|
counts=\\$(basename "\\$par_counts")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# disable flags
|
||||||
|
[[ "\\$par_paired" == "false" ]] && unset par_paired
|
||||||
|
[[ "\\$par_sorted" == "false" ]] && unset par_sorted
|
||||||
|
|
||||||
|
# Run qualimap
|
||||||
qualimap rnaseq \\\\
|
qualimap rnaseq \\\\
|
||||||
--java-mem-size=\\$par_java_memory_size \\\\
|
\\${meta_memory_mb:+--java-mem-size=\\${meta_memory_mb}M} \\\\
|
||||||
--algorithm \\$par_algorithm \\\\
|
\\${par_algorithm:+--algorithm \\$par_algorithm} \\\\
|
||||||
--num-pr-bases \\$par_pr_bases \\\\
|
\\${par_sequencing_protocol:+--sequencing-protocol \\$par_sequencing_protocol} \\\\
|
||||||
--num-tr-bias \\$par_tr_bias \\\\
|
-bam \\$par_bam \\\\
|
||||||
--sequencing-protocol \\$par_sequencing_protocol \\\\
|
|
||||||
-bam \\$par_input \\\\
|
|
||||||
-gtf \\$par_gtf \\\\
|
-gtf \\$par_gtf \\\\
|
||||||
\\${par_paired:+-pe} \\\\
|
-outdir "\\$tmp_dir" \\\\
|
||||||
\\${par_sorted:+-s} \\\\
|
\\${par_num_pr_bases:+--num-pr-bases \\$par_num_pr_bases} \\\\
|
||||||
-outdir \\$par_output_dir \\\\
|
\\${par_num_tr_bias:+--num-tr-bias \\$par_num_tr_bias} \\\\
|
||||||
-outformat \\$par_output_format
|
\\${par_report:+-outformat \\$report_extension} \\\\
|
||||||
|
\\${par_paired:+--paired} \\\\
|
||||||
|
\\${par_sorted:+--sorted} \\\\
|
||||||
|
\\${par_report:+-outfile "\\$outfile"} \\\\
|
||||||
|
\\${par_counts:+-oc "\\$counts"}
|
||||||
|
|
||||||
|
# Move output files
|
||||||
|
mv "\\$tmp_dir/rnaseq_qc_results.txt" "\\$par_qc_results"
|
||||||
|
|
||||||
|
if [ -n "\\$par_report" ] && [ \\$report_extension = "html" ]; then
|
||||||
|
mv "\\$tmp_dir/qualimapReport.html" "\\$par_report"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "\\$par_report" ] && [ \\$report_extension = "pdf" ]; then
|
||||||
|
mv "\\$tmp_dir/\\$outfile" "\\$par_report"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "\\$par_counts" ]; then
|
||||||
|
mv "\\$tmp_dir/\\$counts" "\\$par_counts"
|
||||||
|
fi
|
||||||
VIASHMAIN
|
VIASHMAIN
|
||||||
bash "$tempscript"
|
bash "$tempscript"
|
||||||
'''
|
'''
|
||||||
@@ -3607,7 +3614,7 @@ meta["defaults"] = [
|
|||||||
directives: readJsonBlob('''{
|
directives: readJsonBlob('''{
|
||||||
"container" : {
|
"container" : {
|
||||||
"registry" : "images.viash-hub.com",
|
"registry" : "images.viash-hub.com",
|
||||||
"image" : "vsh/rnaseq/qualimap",
|
"image" : "vsh/biobox/qualimap/qualimap_rnaseq",
|
||||||
"tag" : "main"
|
"tag" : "main"
|
||||||
},
|
},
|
||||||
"tag" : "$id"
|
"tag" : "$id"
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
manifest {
|
manifest {
|
||||||
name = 'fastqc'
|
name = 'qualimap/qualimap_rnaseq'
|
||||||
mainScript = 'main.nf'
|
mainScript = 'main.nf'
|
||||||
nextflowVersion = '!>=20.12.1-edge'
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
version = 'main'
|
version = 'main'
|
||||||
description = 'Fastqc component, please see https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. This component can take one or more files (by means of shell globbing) or a complete directory.\n'
|
description = 'Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5’-3’ bias computation.\n'
|
||||||
|
author = 'Dorien Roosen'
|
||||||
}
|
}
|
||||||
|
|
||||||
process.container = 'nextflow/bash:latest'
|
process.container = 'nextflow/bash:latest'
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "http://json-schema.org/draft-07/schema",
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
"title": "qualimap",
|
"title": "qualimap_rnaseq",
|
||||||
"description": "RNA-seq QC analysis using the qualimap \n",
|
"description": "Qualimap RNA-seq QC reports quality control metrics and bias estimations \nwhich are specific for whole transcriptome sequencing, including reads genomic \norigin, junction analysis, transcript coverage and 5\u2019-3\u2019 bias computation.\n",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"definitions": {
|
"definitions": {
|
||||||
|
|
||||||
@@ -14,11 +14,11 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"input": {
|
"bam": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, required. path to input mapping file in BAM format",
|
"description": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner",
|
||||||
"help_text": "Type: `file`, required. path to input mapping file in BAM format."
|
"help_text": "Type: `file`, required, example: `alignment.bam`. Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner."
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,8 +27,8 @@
|
|||||||
"gtf": {
|
"gtf": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, required. path to annotations file in Ensembl GTF format",
|
"description": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format",
|
||||||
"help_text": "Type: `file`, required. path to annotations file in Ensembl GTF format."
|
"help_text": "Type: `file`, required, example: `annotations.gtf`. Path to genomic annotations in Ensembl GTF format."
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -44,35 +44,35 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"output_dir": {
|
"qc_results": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, default: `$id.$key.output_dir.qualimap_output`. path to output directory for raw data and report",
|
"description": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.output_dir.qualimap_output`. path to output directory for raw data and report."
|
"help_text": "Type: `file`, required, default: `$id.$key.qc_results.txt`, example: `rnaseq_qc_results.txt`. Text file containing the RNAseq QC results."
|
||||||
,
|
,
|
||||||
"default":"$id.$key.output_dir.qualimap_output"
|
"default": "$id.$key.qc_results.txt"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"output_pdf": {
|
"counts": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `file`, default: `$id.$key.output_pdf.pdf`. path to output file for pdf report",
|
"description": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts",
|
||||||
"help_text": "Type: `file`, default: `$id.$key.output_pdf.pdf`. path to output file for pdf report."
|
"help_text": "Type: `file`, default: `$id.$key.counts.counts`. Output file for computed counts."
|
||||||
,
|
,
|
||||||
"default":"$id.$key.output_pdf.pdf"
|
"default": "$id.$key.counts.counts"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"output_format": {
|
"report": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `string`, default: `html`. Format of the output report (PDF or HTML, default is HTML)",
|
"description": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file",
|
||||||
"help_text": "Type: `string`, default: `html`. Format of the output report (PDF or HTML, default is HTML)"
|
"help_text": "Type: `file`, default: `$id.$key.report.html`, example: `report.html`. Report output file. Supported formats are PDF or HTML."
|
||||||
,
|
,
|
||||||
"default":"html"
|
"default": "$id.$key.report.html"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -87,24 +87,22 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
"pr_bases": {
|
"num_pr_bases": {
|
||||||
"type":
|
"type":
|
||||||
"integer",
|
"integer",
|
||||||
"description": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)",
|
"description": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)",
|
||||||
"help_text": "Type: `integer`, default: `100`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)."
|
"help_text": "Type: `integer`. Number of upstream/downstream nucleotide bases to compute 5\u0027-3\u0027 bias (default = 100)."
|
||||||
,
|
|
||||||
"default":100
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
,
|
,
|
||||||
"tr_bias": {
|
"num_tr_bias": {
|
||||||
"type":
|
"type":
|
||||||
"integer",
|
"integer",
|
||||||
"description": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)",
|
"description": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)",
|
||||||
"help_text": "Type: `integer`, default: `1000`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)."
|
"help_text": "Type: `integer`. Number of top highly expressed transcripts to compute 5\u0027-3\u0027 bias (default = 1000)."
|
||||||
,
|
|
||||||
"default":1000
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -112,10 +110,11 @@
|
|||||||
"algorithm": {
|
"algorithm": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm (uniquely-mapped-reads (default) or proportional)",
|
"description": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional)",
|
||||||
"help_text": "Type: `string`, default: `uniquely-mapped-reads`. Counting algorithm (uniquely-mapped-reads (default) or proportional)."
|
"help_text": "Type: `string`, choices: ``uniquely-mapped-reads`, `proportional``. Counting algorithm (uniquely-mapped-reads (default) or proportional).",
|
||||||
,
|
"enum": ["uniquely-mapped-reads", "proportional"]
|
||||||
"default":"uniquely-mapped-reads"
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -123,12 +122,11 @@
|
|||||||
"sequencing_protocol": {
|
"sequencing_protocol": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))",
|
"description": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default))",
|
||||||
"help_text": "Type: `string`, default: `non-strand-specific`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
|
"help_text": "Type: `string`, choices: ``non-strand-specific`, `strand-specific-reverse`, `strand-specific-forward``. Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)).",
|
||||||
"enum": ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"]
|
"enum": ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"]
|
||||||
|
|
||||||
,
|
|
||||||
"default":"non-strand-specific"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -139,7 +137,7 @@
|
|||||||
"description": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads",
|
"description": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads",
|
||||||
"help_text": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads."
|
"help_text": "Type: `boolean_true`, default: `false`. Setting this flag for paired-end experiments will result in counting fragments instead of reads."
|
||||||
,
|
,
|
||||||
"default":false
|
"default": "False"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -150,7 +148,7 @@
|
|||||||
"description": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name",
|
"description": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name",
|
||||||
"help_text": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis."
|
"help_text": "Type: `boolean_true`, default: `false`. Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis."
|
||||||
,
|
,
|
||||||
"default":false
|
"default": "False"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -158,10 +156,9 @@
|
|||||||
"java_memory_size": {
|
"java_memory_size": {
|
||||||
"type":
|
"type":
|
||||||
"string",
|
"string",
|
||||||
"description": "Type: `string`, default: `4G`. maximum Java heap memory size, default = 4G",
|
"description": "Type: `string`. maximum Java heap memory size, default = 4G",
|
||||||
"help_text": "Type: `string`, default: `4G`. maximum Java heap memory size, default = 4G."
|
"help_text": "Type: `string`. maximum Java heap memory size, default = 4G."
|
||||||
,
|
|
||||||
"default":"4G"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,879 @@
|
|||||||
|
name: "rsem_calculate_expression"
|
||||||
|
namespace: "rsem"
|
||||||
|
version: "main"
|
||||||
|
argument_groups:
|
||||||
|
- name: "Input"
|
||||||
|
arguments:
|
||||||
|
- type: "string"
|
||||||
|
name: "--id"
|
||||||
|
description: "Sample ID."
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--strandedness"
|
||||||
|
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- "forward"
|
||||||
|
- "reverse"
|
||||||
|
- "unstranded"
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--paired"
|
||||||
|
description: "Paired-end reads or not?"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "file"
|
||||||
|
name: "--input"
|
||||||
|
description: "Input reads for quantification."
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: true
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--index"
|
||||||
|
description: "RSEM index."
|
||||||
|
info: null
|
||||||
|
must_exist: false
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--extra_args"
|
||||||
|
description: "Extra rsem-calculate-expression arguments in addition to the examples."
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- name: "Output"
|
||||||
|
arguments:
|
||||||
|
- type: "file"
|
||||||
|
name: "--counts_gene"
|
||||||
|
description: "Expression counts on gene level"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.genes.results"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--counts_transcripts"
|
||||||
|
description: "Expression counts on transcript level"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.isoforms.results"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--stat"
|
||||||
|
description: "RSEM statistics"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.stat"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--logs"
|
||||||
|
description: "RSEM logs"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.log"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--bam_star"
|
||||||
|
description: "BAM file generated by STAR (optional)"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.STAR.genome.bam"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--bam_genome"
|
||||||
|
description: "Genome BAM file (optional)"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.genome.bam"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--bam_transcript"
|
||||||
|
description: "Transcript BAM file (optional)"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "$id.transcript.bam"
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "output"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--sort_bam_by_read_name"
|
||||||
|
description: "Sort BAM file aligned under transcript coordidate by read name.\
|
||||||
|
\ Setting this option on will produce \ndeterministic maximum likelihood estimations\
|
||||||
|
\ from independent runs. Note that sorting will take long \ntime and lots of\
|
||||||
|
\ memory.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--no_bam_output"
|
||||||
|
description: "Do not output any BAM file."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--sampling_for_bam"
|
||||||
|
description: "When RSEM generates a BAM file, instead of outputting all alignments\
|
||||||
|
\ a read has with their posterior \nprobabilities, one alignment is sampled\
|
||||||
|
\ according to the posterior probabilities. The sampling procedure \nincludes\
|
||||||
|
\ the alignment to the \"noise\" transcript, which does not appear in the BAM\
|
||||||
|
\ file. Only the \nsampled alignment has a weight of 1. All other alignments\
|
||||||
|
\ have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared\
|
||||||
|
\ in the BAM file should have weight 0.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--output_genome_bam"
|
||||||
|
description: "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped\
|
||||||
|
\ to genomic coordinates and \nannotated with their posterior probabilities.\
|
||||||
|
\ In addition, RSEM will call samtools (included in RSEM \npackage) to sort\
|
||||||
|
\ and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai'\
|
||||||
|
\ \nwill be generated.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--sort_bam_by_coordinate"
|
||||||
|
description: "Sort RSEM generated transcript and genome BAM files by coordinates\
|
||||||
|
\ and build associated indices.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- name: "Basic Options"
|
||||||
|
arguments:
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--no_qualities"
|
||||||
|
description: "Input reads do not contain quality scores."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--alignments"
|
||||||
|
description: "Input file contains alignments in SAM/BAM/CRAM format. The exact\
|
||||||
|
\ file format will be determined \nautomatically.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "file"
|
||||||
|
name: "--fai"
|
||||||
|
description: "If the header section of input alignment file does not contain reference\
|
||||||
|
\ sequence information, \nthis option should be turned on. <file> is a FAI format\
|
||||||
|
\ file containing each reference sequence's \nname and length. Please refer\
|
||||||
|
\ to the SAM official website for the details of FAI format.\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--bowtie2"
|
||||||
|
description: "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM\
|
||||||
|
\ does not handle indel, local \nand discordant alignments, the Bowtie2 parameters\
|
||||||
|
\ are set in a way to avoid those alignments. In \nparticular, we use options\
|
||||||
|
\ '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1'\
|
||||||
|
\ \nby default. The last parameter of '--score_min', '-0.1', is the negative\
|
||||||
|
\ of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'.\
|
||||||
|
\ If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--star"
|
||||||
|
description: "Use STAR to align reads. Alignment parameters are from ENCODE3's\
|
||||||
|
\ STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's\
|
||||||
|
\ Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory\
|
||||||
|
\ with name as 'sample_name.bam'. Each STAR job will have its own private copy\
|
||||||
|
\ of \nthe genome in memory.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--hisat2_hca"
|
||||||
|
description: "Use HISAT2 to align reads to the transcriptome according to Human\
|
||||||
|
\ Cell Atlast.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--append_names"
|
||||||
|
description: "If gene_name/transcript_name is available, append it to the end\
|
||||||
|
\ of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results'\
|
||||||
|
\ and 'sample_name.genes.results'.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--seed"
|
||||||
|
description: "Set the seed for the random number generators used in calculating\
|
||||||
|
\ posterior mean estimates and \ncredibility intervals. The seed must be a non-negative\
|
||||||
|
\ 32 bit integer.\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--single_cell_prior"
|
||||||
|
description: "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior\
|
||||||
|
\ mean estimates and credibility \nintervals. However, much less genes are expressed\
|
||||||
|
\ in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean\
|
||||||
|
\ estimates and/or credibility intervals and you have single-cell RNA-Seq data,\
|
||||||
|
\ \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1)\
|
||||||
|
\ as the prior which \nencourage the sparsity of the expression levels.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--calc_pme"
|
||||||
|
description: "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--calc_ci"
|
||||||
|
description: "Calculate 95% credibility intervals and posterior mean estimates.\
|
||||||
|
\ The credibility level can be \nchanged by setting '--ci_credibility_level'.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--quiet"
|
||||||
|
alternatives:
|
||||||
|
- "-q"
|
||||||
|
description: "Suppress the output of logging information."
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- name: "Aligner Options"
|
||||||
|
arguments:
|
||||||
|
- type: "integer"
|
||||||
|
name: "--seed_length"
|
||||||
|
description: "Seed length used by the read aligner. Providing the correct value\
|
||||||
|
\ is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's\
|
||||||
|
\ seed length parameter. Any read with its or at least \none of its mates' (for\
|
||||||
|
\ paired-end reads) length less than this value will be ignored. If the \nreferences\
|
||||||
|
\ are not added poly(A) tails, the minimum allowed value is 5, otherwise, the\
|
||||||
|
\ minimum \nallowed value is 25. Note that this script will only check if the\
|
||||||
|
\ value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default:\
|
||||||
|
\ 25)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 25
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--phred64_quals"
|
||||||
|
description: "Input quality scores are encoded as Phred+64 (default for GA Pipeline\
|
||||||
|
\ ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise,\
|
||||||
|
\ quality score will be encoded as Phred+33. (Default: false)\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--solexa_quals"
|
||||||
|
description: "Input quality scores are solexa encoded (from GA Pipeline ver. <\
|
||||||
|
\ 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality\
|
||||||
|
\ score will be encoded as Phred+33. (Default: false)\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--bowtie_n"
|
||||||
|
description: "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,\
|
||||||
|
\ Default: 2)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 2
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- 0
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--bowtie_e"
|
||||||
|
description: "(Bowtie parameter) max sum of mismatch quality scores across the\
|
||||||
|
\ alignment. (Default: 99999999)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 99999999
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--bowtie_m"
|
||||||
|
description: "(Bowtie parameter) suppress all alignments for a read if > <int>\
|
||||||
|
\ valid alignments exist. (Default: 200)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 200
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--bowtie_chunkmbs"
|
||||||
|
description: "(Bowtie parameter) memory allocated for best first alignment calculation\
|
||||||
|
\ (Default: 0 - use Bowtie's default)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "double"
|
||||||
|
name: "--bowtie2_mismatch_rate"
|
||||||
|
description: "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:\
|
||||||
|
\ 0.1)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0.1
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--bowtie2_k"
|
||||||
|
description: "(Bowtie 2 parameter) Find up to <int> alignments per read. (Default:\
|
||||||
|
\ 200)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 200
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--bowtie2_sensitivity_level"
|
||||||
|
description: "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end\
|
||||||
|
\ mode. This option controls how \nhard Bowtie 2 tries to find alignments. <string>\
|
||||||
|
\ must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\"\
|
||||||
|
. The four candidates correspond to Bowtie 2's \"--very-fast\", \"--fast\",\
|
||||||
|
\ \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\"\
|
||||||
|
\ - use Bowtie 2's default)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "sensitive"
|
||||||
|
required: false
|
||||||
|
choices:
|
||||||
|
- "very_fast"
|
||||||
|
- "fast"
|
||||||
|
- "sensitive"
|
||||||
|
- "very_sensitive"
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--star_gzipped_read_file"
|
||||||
|
description: "Input read file(s) is compressed by gzip. (Default: false)\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--star_bzipped_read_file"
|
||||||
|
description: "Input read file(s) is compressed by bzip2. (Default: false)\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--star_output_genome_bam"
|
||||||
|
description: "Save the BAM file from STAR alignment under genomic coordinate to\
|
||||||
|
\ 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate.\
|
||||||
|
\ In this file, according to STAR's manual, 'paired \nends of an alignment are\
|
||||||
|
\ always adjacent, and multiple alignments of a read are adjacent as well'.\
|
||||||
|
\ \n(Default: false)\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- name: "Advanced Options"
|
||||||
|
arguments:
|
||||||
|
- type: "string"
|
||||||
|
name: "--tag"
|
||||||
|
description: "The name of the optional field used in the SAM input for identifying\
|
||||||
|
\ a read with too many valid \nalignments. The field should have the format\
|
||||||
|
\ <tagName>:i:<value>, where a <value> bigger than 0 \nindicates a read with\
|
||||||
|
\ too many alignments. (Default: \"\")\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- ""
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--fragment_length_min"
|
||||||
|
description: "Minimum read/insert length allowed. This is also the value for the\
|
||||||
|
\ Bowtie/Bowtie2 -I option. \n(Default: 1)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 1
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--fragment_length_max"
|
||||||
|
description: "Maximum read/insert length allowed. This is also the value for the\
|
||||||
|
\ Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 1000
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--fragment_length_mean"
|
||||||
|
description: "(single-end data only) The mean of the fragment length distribution,\
|
||||||
|
\ which is assumed to be a \nGaussian. (Default: -1, which disables use of the\
|
||||||
|
\ fragment length distribution)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- -1
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "double"
|
||||||
|
name: "--gragment_length_sd"
|
||||||
|
description: "(single-end data only) The standard deviation of the fragment length\
|
||||||
|
\ distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes\
|
||||||
|
\ that all fragments are of the same length, \ngiven by the rounded value of\
|
||||||
|
\ --fragment_length_mean).\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0.0
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--estimate_rspd"
|
||||||
|
description: "Set this option if you want to estimate the read start position\
|
||||||
|
\ distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--num_rspd_bins"
|
||||||
|
description: "Number of bins in the RSPD. Only relevant when '--estimate_rspd'\
|
||||||
|
\ is specified. Use of the default \nsetting is recommended. (Default: 20)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 20
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--gibbs_burnin"
|
||||||
|
description: "The number of burn-in rounds for RSEM's Gibbs sampler. Each round\
|
||||||
|
\ passes over the entire data set \nonce. If RSEM can use multiple threads,\
|
||||||
|
\ multiple Gibbs samplers will start at the same time and all \nsamplers share\
|
||||||
|
\ the same burn-in number. (Default: 200)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 200
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--gibbs_number_of_samples"
|
||||||
|
description: "The total number of count vectors RSEM will collect from its Gibbs\
|
||||||
|
\ samplers. (Default: 1000)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 1000
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--gibbs_sampling_gap"
|
||||||
|
description: "The number of rounds between two succinct count vectors RSEM collects.\
|
||||||
|
\ If the count vector after \nround N is collected, the count vector after round\
|
||||||
|
\ N + <int> will also be collected. (Default: 1)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 1
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "double"
|
||||||
|
name: "--ci_credibility_level"
|
||||||
|
description: "The credibility level for credibility intervals. (Default: 0.95)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 0.95
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--ci_number_of_samples_per_count_vector"
|
||||||
|
description: "The number of read generating probability vectors sampled per sampled\
|
||||||
|
\ count vector. The crebility \nintervals are calculated by first sampling P(C\
|
||||||
|
\ | D) and then sampling P(Theta | C) for each sampled \ncount vector. This\
|
||||||
|
\ option controls how many Theta vectors are sampled per sampled count vector.\
|
||||||
|
\ \n(Default: 50)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- 50
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--keep_intermediate_files"
|
||||||
|
description: "Keep temporary files generated by RSEM. RSEM creates a temporary\
|
||||||
|
\ directory, 'sample_name.temp', \ninto which it puts all intermediate output\
|
||||||
|
\ files. If this directory already exists, RSEM overwrites \nall files generated\
|
||||||
|
\ by previous RSEM runs inside of it. By default, after RSEM finishes, the \n\
|
||||||
|
temporary directory is deleted. Set this option to prevent the deletion of this\
|
||||||
|
\ directory and the \nintermediate files inside of it.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "string"
|
||||||
|
name: "--temporary_folder"
|
||||||
|
description: "Set where to put the temporary files generated by RSEM. If the folder\
|
||||||
|
\ specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "sample_name.temp"
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--time"
|
||||||
|
description: "Output time consumed by each step of RSEM to 'sample_name.time'.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- name: "Prior-Enhanced RSEM Options"
|
||||||
|
arguments:
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--run_pRSEM"
|
||||||
|
description: "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's\
|
||||||
|
\ initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input\
|
||||||
|
\ RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq\
|
||||||
|
\ peak information to partition isoforms (e.g. in pRSEM's default \npartition\
|
||||||
|
\ model), either ChIP-seq peak file (with the '--chipseq_peak_file' option)\
|
||||||
|
\ or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables\
|
||||||
|
\ are required (with the \n'--chipseq_target_read_files <string>', '--chipseq_control_read_files\
|
||||||
|
\ <string>', and '--bowtie_path \n<path> options), otherwise, ChIP-seq FASTQ\
|
||||||
|
\ files for target and control and the path to Bowtie \nexecutables are required.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "file"
|
||||||
|
name: "--chipseq_peak_file"
|
||||||
|
description: "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4,\
|
||||||
|
\ format. This file is used \nwhen running prior-enhanced RSEM in the default\
|
||||||
|
\ two-partition model. It partitions isoforms by \nwhether they have ChIP-seq\
|
||||||
|
\ overlapping with their transcription start site region or not. Each \npartition\
|
||||||
|
\ will have its own prior parameter learned from a training set. This file can\
|
||||||
|
\ be either \ngzipped or ungzipped.\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--chipseq_target_read_files"
|
||||||
|
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq target.\
|
||||||
|
\ This option is used when running \nprior-enhanced RSEM. It provides information\
|
||||||
|
\ to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped\
|
||||||
|
\ or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>'\
|
||||||
|
\ \nand '--chipseq_control_read_files <string>' must be defined when this option\
|
||||||
|
\ is specified.\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--chipseq_control_read_files"
|
||||||
|
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.\
|
||||||
|
\ This option is used when running \nprior-enhanced RSEM. It provides information\
|
||||||
|
\ to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with\
|
||||||
|
\ a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>' and \n'--chipseq_target_read_files\
|
||||||
|
\ <string>' must be defined when this option is specified.\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--chipseq_read_files_multi_targets"
|
||||||
|
description: "Comma-separated full path of FASTQ read files for multiple ChIP-seq\
|
||||||
|
\ targets. This option is used when \nrunning prior-enhanced RSEM, where prior\
|
||||||
|
\ is learned from multiple complementary data sets. It provides \ninformation\
|
||||||
|
\ to calculate ChIP-seq signals. All files can be either ungzipped or gzipped\
|
||||||
|
\ with a suffix \n'.gz' or '.gzip'. When this option is specified, the option\
|
||||||
|
\ '--bowtie_path <path>' must be defined and \nthe option '--partition_model\
|
||||||
|
\ <string>' will be set to 'cmb_lgt' automatically.\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "file"
|
||||||
|
name: "--chipseq_bed_files_multi_targets"
|
||||||
|
description: "Comma-separated full path of BED files for multiple ChIP-seq targets.\
|
||||||
|
\ This option is used when running \nprior-enhanced RSEM, where prior is learned\
|
||||||
|
\ from multiple complementary data sets. It provides information \nof ChIP-seq\
|
||||||
|
\ signals and must have at least the first six BED columns. All files can be\
|
||||||
|
\ either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option\
|
||||||
|
\ is specified, the option '--partition_model \n<string>' will be set to 'cmb_lgt'\
|
||||||
|
\ automatically.\n"
|
||||||
|
info: null
|
||||||
|
must_exist: true
|
||||||
|
create_parent: true
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "boolean_true"
|
||||||
|
name: "--cap_stacked_chipseq_reads"
|
||||||
|
description: "Keep a maximum number of ChIP-seq reads that aligned to the same\
|
||||||
|
\ genomic interval. This option is used \nwhen running prior-enhanced RSEM,\
|
||||||
|
\ where prior is learned from multiple complementary data sets. This \noption\
|
||||||
|
\ is only in use when either '--chipseq_read_files_multi_targets <string>' or\
|
||||||
|
\ \n'--chipseq_bed_files_multi_targets <string>' is specified.\n"
|
||||||
|
info: null
|
||||||
|
direction: "input"
|
||||||
|
- type: "integer"
|
||||||
|
name: "--n_max_stacked_chipseq_reads"
|
||||||
|
description: "The maximum number of stacked ChIP-seq reads to keep. This option\
|
||||||
|
\ is used when running prior-enhanced \nRSEM, where prior is learned from multiple\
|
||||||
|
\ complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads'\
|
||||||
|
\ is set.\n"
|
||||||
|
info: null
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
- type: "string"
|
||||||
|
name: "--partition_model"
|
||||||
|
description: "A keyword to specify the partition model used by prior-enhanced\
|
||||||
|
\ RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3,\
|
||||||
|
\ lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk,\
|
||||||
|
\ pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above\
|
||||||
|
\ models are learned from a training set. For detailed explanations, please\
|
||||||
|
\ \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n"
|
||||||
|
info: null
|
||||||
|
example:
|
||||||
|
- "pk"
|
||||||
|
required: false
|
||||||
|
direction: "input"
|
||||||
|
multiple: false
|
||||||
|
multiple_sep: ";"
|
||||||
|
resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "script.sh"
|
||||||
|
is_executable: true
|
||||||
|
description: "Calculate expression with RSEM. \n"
|
||||||
|
test_resources:
|
||||||
|
- type: "bash_script"
|
||||||
|
path: "test.sh"
|
||||||
|
is_executable: true
|
||||||
|
info: null
|
||||||
|
status: "enabled"
|
||||||
|
requirements:
|
||||||
|
commands:
|
||||||
|
- "ps"
|
||||||
|
keywords:
|
||||||
|
- "Transcriptome"
|
||||||
|
- "Index"
|
||||||
|
- "Alignment"
|
||||||
|
- "RSEM"
|
||||||
|
license: "GPL-3.0"
|
||||||
|
references:
|
||||||
|
doi:
|
||||||
|
- "https://doi.org/10.1186/1471-2105-12-323"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/deweylab/RSEM"
|
||||||
|
homepage: "https://deweylab.github.io/RSEM/"
|
||||||
|
documentation: "https://deweylab.github.io/RSEM/rsem-calculate-expression.html"
|
||||||
|
runners:
|
||||||
|
- type: "executable"
|
||||||
|
id: "executable"
|
||||||
|
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||||
|
- type: "nextflow"
|
||||||
|
id: "nextflow"
|
||||||
|
directives:
|
||||||
|
tag: "$id"
|
||||||
|
auto:
|
||||||
|
simplifyInput: true
|
||||||
|
simplifyOutput: false
|
||||||
|
transcript: false
|
||||||
|
publish: false
|
||||||
|
config:
|
||||||
|
labels:
|
||||||
|
mem1gb: "memory = 1000000000.B"
|
||||||
|
mem2gb: "memory = 2000000000.B"
|
||||||
|
mem5gb: "memory = 5000000000.B"
|
||||||
|
mem10gb: "memory = 10000000000.B"
|
||||||
|
mem20gb: "memory = 20000000000.B"
|
||||||
|
mem50gb: "memory = 50000000000.B"
|
||||||
|
mem100gb: "memory = 100000000000.B"
|
||||||
|
mem200gb: "memory = 200000000000.B"
|
||||||
|
mem500gb: "memory = 500000000000.B"
|
||||||
|
mem1tb: "memory = 1000000000000.B"
|
||||||
|
mem2tb: "memory = 2000000000000.B"
|
||||||
|
mem5tb: "memory = 5000000000000.B"
|
||||||
|
mem10tb: "memory = 10000000000000.B"
|
||||||
|
mem20tb: "memory = 20000000000000.B"
|
||||||
|
mem50tb: "memory = 50000000000000.B"
|
||||||
|
mem100tb: "memory = 100000000000000.B"
|
||||||
|
mem200tb: "memory = 200000000000000.B"
|
||||||
|
mem500tb: "memory = 500000000000000.B"
|
||||||
|
mem1gib: "memory = 1073741824.B"
|
||||||
|
mem2gib: "memory = 2147483648.B"
|
||||||
|
mem4gib: "memory = 4294967296.B"
|
||||||
|
mem8gib: "memory = 8589934592.B"
|
||||||
|
mem16gib: "memory = 17179869184.B"
|
||||||
|
mem32gib: "memory = 34359738368.B"
|
||||||
|
mem64gib: "memory = 68719476736.B"
|
||||||
|
mem128gib: "memory = 137438953472.B"
|
||||||
|
mem256gib: "memory = 274877906944.B"
|
||||||
|
mem512gib: "memory = 549755813888.B"
|
||||||
|
mem1tib: "memory = 1099511627776.B"
|
||||||
|
mem2tib: "memory = 2199023255552.B"
|
||||||
|
mem4tib: "memory = 4398046511104.B"
|
||||||
|
mem8tib: "memory = 8796093022208.B"
|
||||||
|
mem16tib: "memory = 17592186044416.B"
|
||||||
|
mem32tib: "memory = 35184372088832.B"
|
||||||
|
mem64tib: "memory = 70368744177664.B"
|
||||||
|
mem128tib: "memory = 140737488355328.B"
|
||||||
|
mem256tib: "memory = 281474976710656.B"
|
||||||
|
mem512tib: "memory = 562949953421312.B"
|
||||||
|
cpu1: "cpus = 1"
|
||||||
|
cpu2: "cpus = 2"
|
||||||
|
cpu5: "cpus = 5"
|
||||||
|
cpu10: "cpus = 10"
|
||||||
|
cpu20: "cpus = 20"
|
||||||
|
cpu50: "cpus = 50"
|
||||||
|
cpu100: "cpus = 100"
|
||||||
|
cpu200: "cpus = 200"
|
||||||
|
cpu500: "cpus = 500"
|
||||||
|
cpu1000: "cpus = 1000"
|
||||||
|
debug: false
|
||||||
|
container: "docker"
|
||||||
|
engines:
|
||||||
|
- type: "docker"
|
||||||
|
id: "docker"
|
||||||
|
image: "ubuntu:22.04"
|
||||||
|
target_registry: "images.viash-hub.com"
|
||||||
|
target_tag: "main"
|
||||||
|
namespace_separator: "/"
|
||||||
|
setup:
|
||||||
|
- type: "apt"
|
||||||
|
packages:
|
||||||
|
- "build-essential"
|
||||||
|
- "gcc"
|
||||||
|
- "g++"
|
||||||
|
- "make"
|
||||||
|
- "wget"
|
||||||
|
- "zlib1g-dev"
|
||||||
|
- "unzip"
|
||||||
|
interactive: false
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "apt-get update && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip\
|
||||||
|
\ && \\\nunzip 2.7.11a.zip && \\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR\
|
||||||
|
\ /usr/local/bin && \\\ncd && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip\
|
||||||
|
\ && \\\nunzip v1.3.3.zip && \\\ncd RSEM-1.3.3 && \\\nmake && \\\nmake install\n"
|
||||||
|
env:
|
||||||
|
- "STAR_VERSION=2.7.11b"
|
||||||
|
- "RSEM_VERSION=1.3.3"
|
||||||
|
- type: "docker"
|
||||||
|
run:
|
||||||
|
- "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\
|
||||||
|
\ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\
|
||||||
|
\ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\
|
||||||
|
\ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\
|
||||||
|
\ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\
|
||||||
|
d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\
|
||||||
|
\ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n"
|
||||||
|
entrypoint: []
|
||||||
|
cmd: null
|
||||||
|
- type: "native"
|
||||||
|
id: "native"
|
||||||
|
build_info:
|
||||||
|
config: "src/rsem/rsem_calculate_expression/config.vsh.yaml"
|
||||||
|
runner: "nextflow"
|
||||||
|
engine: "docker|native"
|
||||||
|
output: "target/nextflow/rsem/rsem_calculate_expression"
|
||||||
|
executable: "target/nextflow/rsem/rsem_calculate_expression/main.nf"
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
git_commit: "a13b57d04a3f3741eedd1af10fd96a9bee126f55"
|
||||||
|
git_remote: "https://x-access-token:ghs_xpDMoQpz4lF1RaGsMH4IlMbO48cLeW1cIYSF@github.com/viash-hub/biobox"
|
||||||
|
git_tag: "v0.2.0-26-ga13b57d"
|
||||||
|
package_config:
|
||||||
|
name: "biobox"
|
||||||
|
version: "main"
|
||||||
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
||||||
|
info: null
|
||||||
|
viash_version: "0.9.0"
|
||||||
|
source: "src"
|
||||||
|
target: "target"
|
||||||
|
config_mods:
|
||||||
|
- ".requirements.commands := ['ps']\n"
|
||||||
|
- ".engines += { type: \"native\" }"
|
||||||
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||||
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||||
|
keywords:
|
||||||
|
- "bioinformatics"
|
||||||
|
- "modules"
|
||||||
|
- "sequencing"
|
||||||
|
license: "MIT"
|
||||||
|
organization: "vsh"
|
||||||
|
links:
|
||||||
|
repository: "https://github.com/viash-hub/biobox"
|
||||||
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@ manifest {
|
|||||||
mainScript = 'main.nf'
|
mainScript = 'main.nf'
|
||||||
nextflowVersion = '!>=20.12.1-edge'
|
nextflowVersion = '!>=20.12.1-edge'
|
||||||
version = 'main'
|
version = 'main'
|
||||||
description = 'Calculate expression with RSEM.\n'
|
description = 'Calculate expression with RSEM. \n'
|
||||||
}
|
}
|
||||||
|
|
||||||
process.container = 'nextflow/bash:latest'
|
process.container = 'nextflow/bash:latest'
|
||||||
@@ -0,0 +1,839 @@
|
|||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema",
|
||||||
|
"title": "rsem_calculate_expression",
|
||||||
|
"description": "Calculate expression with RSEM. \n",
|
||||||
|
"type": "object",
|
||||||
|
"definitions": {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"input" : {
|
||||||
|
"title": "Input",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"id": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`. Sample ID",
|
||||||
|
"help_text": "Type: `string`. Sample ID."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"strandedness": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity",
|
||||||
|
"help_text": "Type: `string`, choices: ``forward`, `reverse`, `unstranded``. Sample strand-specificity. Must be one of unstranded, forward, reverse",
|
||||||
|
"enum": ["forward", "reverse", "unstranded"]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"paired": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Paired-end reads or not?",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Paired-end reads or not?"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"input": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification",
|
||||||
|
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. Input reads for quantification."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"index": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. RSEM index",
|
||||||
|
"help_text": "Type: `file`. RSEM index."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"extra_args": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples",
|
||||||
|
"help_text": "Type: `string`. Extra rsem-calculate-expression arguments in addition to the examples."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"output" : {
|
||||||
|
"title": "Output",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"counts_gene": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.counts_gene.results`, example: `$id.genes.results`. Expression counts on gene level"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.counts_gene.results"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"counts_transcripts": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.counts_transcripts.results`, example: `$id.isoforms.results`. Expression counts on transcript level"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.counts_transcripts.results"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"stat": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.stat.stat`, example: `$id.stat`. RSEM statistics"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.stat.stat"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"logs": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.logs.log`, example: `$id.log`. RSEM logs"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.logs.log"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bam_star": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.bam_star.bam`, example: `$id.STAR.genome.bam`. BAM file generated by STAR (optional)"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.bam_star.bam"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bam_genome": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.bam_genome.bam`, example: `$id.genome.bam`. Genome BAM file (optional)"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.bam_genome.bam"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bam_transcript": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)",
|
||||||
|
"help_text": "Type: `file`, default: `$id.$key.bam_transcript.bam`, example: `$id.transcript.bam`. Transcript BAM file (optional)"
|
||||||
|
,
|
||||||
|
"default": "$id.$key.bam_transcript.bam"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"sort_bam_by_read_name": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Sort BAM file aligned under transcript coordidate by read name. Setting this option on will produce \ndeterministic maximum likelihood estimations from independent runs. Note that sorting will take long \ntime and lots of memory.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"no_bam_output": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Do not output any BAM file",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Do not output any BAM file."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"sampling_for_bam": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. When RSEM generates a BAM file, instead of outputting all alignments a read has with their posterior \nprobabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure \nincludes the alignment to the \"noise\" transcript, which does not appear in the BAM file. Only the \nsampled alignment has a weight of 1. All other alignments have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared in the BAM file should have weight 0.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"output_genome_bam": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Generate a BAM file, \u0027sample_name.genome.bam\u0027, with alignments mapped to genomic coordinates and \nannotated with their posterior probabilities. In addition, RSEM will call samtools (included in RSEM \npackage) to sort and index the bam file. \u0027sample_name.genome.sorted.bam\u0027 and \u0027sample_name.genome.sorted.bam.bai\u0027 \nwill be generated.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"sort_bam_by_coordinate": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Sort RSEM generated transcript and genome BAM files by coordinates and build associated indices.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"basic options" : {
|
||||||
|
"title": "Basic Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"no_qualities": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Input reads do not contain quality scores."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"alignments": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Input file contains alignments in SAM/BAM/CRAM format. The exact file format will be determined \nautomatically.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fai": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on",
|
||||||
|
"help_text": "Type: `file`. If the header section of input alignment file does not contain reference sequence information, \nthis option should be turned on. \u003cfile\u003e is a FAI format file containing each reference sequence\u0027s \nname and length. Please refer to the SAM official website for the details of FAI format.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie2": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM does not handle indel, local \nand discordant alignments, the Bowtie2 parameters are set in a way to avoid those alignments. In \nparticular, we use options \u0027--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1\u0027 \nby default. The last parameter of \u0027--score_min\u0027, \u0027-0.1\u0027, is the negative of maximum mismatch rate. \nThis rate can be set by option \u0027--bowtie2_mismatch_rate\u0027. If reads are paired-end, we additionally \nuse options \u0027--no_mixed\u0027 and \u0027--no_discordant\u0027.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"star": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Use STAR to align reads",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Use STAR to align reads. Alignment parameters are from ENCODE3\u0027s STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR\u0027s Output BAM file is unsorted. It is stored in RSEM\u0027s \ntemporary directory with name as \u0027sample_name.bam\u0027. Each STAR job will have its own private copy of \nthe genome in memory.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"hisat2_hca": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Use HISAT2 to align reads to the transcriptome according to Human Cell Atlast.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"append_names": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. If gene_name/transcript_name is available, append it to the end of gene_id/transcript_id (separated \nby \u0027_\u0027) in files \u0027sample_name.isoforms.results\u0027 and \u0027sample_name.genes.results\u0027.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"seed": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals",
|
||||||
|
"help_text": "Type: `integer`. Set the seed for the random number generators used in calculating posterior mean estimates and \ncredibility intervals. The seed must be a non-negative 32 bit integer.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"single_cell_prior": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. By default, RSEM uses Dirichlet(1) as the prior to calculate posterior mean estimates and credibility \nintervals. However, much less genes are expressed in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean estimates and/or credibility intervals and you have single-cell RNA-Seq data, \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1) as the prior which \nencourage the sparsity of the expression levels.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"calc_pme": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Run RSEM\u0027s collapsed Gibbs sampler to calculate posterior mean estimates."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"calc_ci": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Calculate 95% credibility intervals and posterior mean estimates. The credibility level can be \nchanged by setting \u0027--ci_credibility_level\u0027.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"quiet": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Suppress the output of logging information",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Suppress the output of logging information."
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"aligner options" : {
|
||||||
|
"title": "Aligner Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"seed_length": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `25`. Seed length used by the read aligner",
|
||||||
|
"help_text": "Type: `integer`, example: `25`. Seed length used by the read aligner. Providing the correct value is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie\u0027s seed length parameter. Any read with its or at least \none of its mates\u0027 (for paired-end reads) length less than this value will be ignored. If the \nreferences are not added poly(A) tails, the minimum allowed value is 5, otherwise, the minimum \nallowed value is 25. Note that this script will only check if the value \u003e= 5 and give a warning \nmessage if the value \u003c 25 but \u003e= 5. (Default: 25)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"phred64_quals": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Input quality scores are encoded as Phred+64 (default for GA Pipeline ver. \u003e= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"solexa_quals": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Input quality scores are solexa encoded (from GA Pipeline ver. \u003c 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality score will be encoded as Phred+33. (Default: false)\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie_n": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed",
|
||||||
|
"help_text": "Type: `integer`, example: `2`, choices: ``0`, `1`, `2`, `3``. (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3, Default: 2)\n",
|
||||||
|
"enum": [0, 1, 2, 3]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie_e": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment",
|
||||||
|
"help_text": "Type: `integer`, example: `99999999`. (Bowtie parameter) max sum of mismatch quality scores across the alignment. (Default: 99999999)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie_m": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist",
|
||||||
|
"help_text": "Type: `integer`, example: `200`. (Bowtie parameter) suppress all alignments for a read if \u003e \u003cint\u003e valid alignments exist. (Default: 200)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie_chunkmbs": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n",
|
||||||
|
"help_text": "Type: `integer`, example: `0`. (Bowtie parameter) memory allocated for best first alignment calculation (Default: 0 - use Bowtie\u0027s default)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie2_mismatch_rate": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed",
|
||||||
|
"help_text": "Type: `double`, example: `0.1`. (Bowtie 2 parameter) The maximum mismatch rate allowed. (Default: 0.1)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie2_k": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read",
|
||||||
|
"help_text": "Type: `integer`, example: `200`. (Bowtie 2 parameter) Find up to \u003cint\u003e alignments per read. (Default: 200)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"bowtie2_sensitivity_level": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode",
|
||||||
|
"help_text": "Type: `string`, example: `sensitive`, choices: ``very_fast`, `fast`, `sensitive`, `very_sensitive``. (Bowtie 2 parameter) Set Bowtie 2\u0027s preset options in --end-to-end mode. This option controls how \nhard Bowtie 2 tries to find alignments. \u003cstring\u003e must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\". The four candidates correspond to Bowtie 2\u0027s \"--very-fast\", \"--fast\", \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\" - use Bowtie 2\u0027s default)\n",
|
||||||
|
"enum": ["very_fast", "fast", "sensitive", "very_sensitive"]
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"star_gzipped_read_file": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by gzip. (Default: false)\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"star_bzipped_read_file": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Input read file(s) is compressed by bzip2. (Default: false)\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"star_output_genome_bam": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Save the BAM file from STAR alignment under genomic coordinate to \u0027sample_name.STAR.genome.bam\u0027. \nThis file is NOT sorted by genomic coordinate. In this file, according to STAR\u0027s manual, \u0027paired \nends of an alignment are always adjacent, and multiple alignments of a read are adjacent as well\u0027. \n(Default: false)\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"advanced options" : {
|
||||||
|
"title": "Advanced Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"tag": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments",
|
||||||
|
"help_text": "Type: `string`, example: ``. The name of the optional field used in the SAM input for identifying a read with too many valid \nalignments. The field should have the format \u003ctagName\u003e:i:\u003cvalue\u003e, where a \u003cvalue\u003e bigger than 0 \nindicates a read with too many alignments. (Default: \"\")\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fragment_length_min": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `1`. Minimum read/insert length allowed",
|
||||||
|
"help_text": "Type: `integer`, example: `1`. Minimum read/insert length allowed. This is also the value for the Bowtie/Bowtie2 -I option. \n(Default: 1)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fragment_length_max": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `1000`. Maximum read/insert length allowed",
|
||||||
|
"help_text": "Type: `integer`, example: `1000`. Maximum read/insert length allowed. This is also the value for the Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"fragment_length_mean": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian",
|
||||||
|
"help_text": "Type: `integer`, example: `-1`. (single-end data only) The mean of the fragment length distribution, which is assumed to be a \nGaussian. (Default: -1, which disables use of the fragment length distribution)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"gragment_length_sd": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian",
|
||||||
|
"help_text": "Type: `double`, example: `0.0`. (single-end data only) The standard deviation of the fragment length distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes that all fragments are of the same length, \ngiven by the rounded value of --fragment_length_mean).\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"estimate_rspd": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Set this option if you want to estimate the read start position distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"num_rspd_bins": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `20`. Number of bins in the RSPD",
|
||||||
|
"help_text": "Type: `integer`, example: `20`. Number of bins in the RSPD. Only relevant when \u0027--estimate_rspd\u0027 is specified. Use of the default \nsetting is recommended. (Default: 20)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"gibbs_burnin": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler",
|
||||||
|
"help_text": "Type: `integer`, example: `200`. The number of burn-in rounds for RSEM\u0027s Gibbs sampler. Each round passes over the entire data set \nonce. If RSEM can use multiple threads, multiple Gibbs samplers will start at the same time and all \nsamplers share the same burn-in number. (Default: 200)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"gibbs_number_of_samples": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers",
|
||||||
|
"help_text": "Type: `integer`, example: `1000`. The total number of count vectors RSEM will collect from its Gibbs samplers. (Default: 1000)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"gibbs_sampling_gap": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects",
|
||||||
|
"help_text": "Type: `integer`, example: `1`. The number of rounds between two succinct count vectors RSEM collects. If the count vector after \nround N is collected, the count vector after round N + \u003cint\u003e will also be collected. (Default: 1)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ci_credibility_level": {
|
||||||
|
"type":
|
||||||
|
"number",
|
||||||
|
"description": "Type: `double`, example: `0.95`. The credibility level for credibility intervals",
|
||||||
|
"help_text": "Type: `double`, example: `0.95`. The credibility level for credibility intervals. (Default: 0.95)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"ci_number_of_samples_per_count_vector": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector",
|
||||||
|
"help_text": "Type: `integer`, example: `50`. The number of read generating probability vectors sampled per sampled count vector. The crebility \nintervals are calculated by first sampling P(C | D) and then sampling P(Theta | C) for each sampled \ncount vector. This option controls how many Theta vectors are sampled per sampled count vector. \n(Default: 50)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"keep_intermediate_files": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Keep temporary files generated by RSEM. RSEM creates a temporary directory, \u0027sample_name.temp\u0027, \ninto which it puts all intermediate output files. If this directory already exists, RSEM overwrites \nall files generated by previous RSEM runs inside of it. By default, after RSEM finishes, the \ntemporary directory is deleted. Set this option to prevent the deletion of this directory and the \nintermediate files inside of it.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"temporary_folder": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM",
|
||||||
|
"help_text": "Type: `string`, example: `sample_name.temp`. Set where to put the temporary files generated by RSEM. If the folder specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"time": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Output time consumed by each step of RSEM to \u0027sample_name.time\u0027.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"prior-enhanced rsem options" : {
|
||||||
|
"title": "Prior-Enhanced RSEM Options",
|
||||||
|
"type": "object",
|
||||||
|
"description": "No description",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"run_pRSEM": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM)",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform\u0027s initial pseudo-count for \nRSEM\u0027s Gibbs sampling, will be learned from input RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq peak information to partition isoforms (e.g. in pRSEM\u0027s default \npartition model), either ChIP-seq peak file (with the \u0027--chipseq_peak_file\u0027 option) or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables are required (with the \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027, \u0027--chipseq_control_read_files \u003cstring\u003e\u0027, and \u0027--bowtie_path \n\u003cpath\u003e options), otherwise, ChIP-seq FASTQ files for target and control and the path to Bowtie \nexecutables are required.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"chipseq_peak_file": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i",
|
||||||
|
"help_text": "Type: `file`. Full path to a ChIP-seq peak file in ENCODE\u0027s narrowPeak, i.e. BED6+4, format. This file is used \nwhen running prior-enhanced RSEM in the default two-partition model. It partitions isoforms by \nwhether they have ChIP-seq overlapping with their transcription start site region or not. Each \npartition will have its own prior parameter learned from a training set. This file can be either \ngzipped or ungzipped.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"chipseq_target_read_files": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target",
|
||||||
|
"help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq target. This option is used when running \nprior-enhanced RSEM. It provides information to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 \nand \u0027--chipseq_control_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"chipseq_control_read_files": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol",
|
||||||
|
"help_text": "Type: `file`. Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol. This option is used when running \nprior-enhanced RSEM. It provides information to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. The options \u0027--bowtie_path \u003cpath\u003e\u0027 and \n\u0027--chipseq_target_read_files \u003cstring\u003e\u0027 must be defined when this option is specified.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"chipseq_read_files_multi_targets": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets",
|
||||||
|
"help_text": "Type: `file`. Comma-separated full path of FASTQ read files for multiple ChIP-seq targets. This option is used when \nrunning prior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides \ninformation to calculate ChIP-seq signals. All files can be either ungzipped or gzipped with a suffix \n\u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--bowtie_path \u003cpath\u003e\u0027 must be defined and \nthe option \u0027--partition_model \u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"chipseq_bed_files_multi_targets": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets",
|
||||||
|
"help_text": "Type: `file`. Comma-separated full path of BED files for multiple ChIP-seq targets. This option is used when running \nprior-enhanced RSEM, where prior is learned from multiple complementary data sets. It provides information \nof ChIP-seq signals and must have at least the first six BED columns. All files can be either ungzipped \nor gzipped with a suffix \u0027.gz\u0027 or \u0027.gzip\u0027. When this option is specified, the option \u0027--partition_model \n\u003cstring\u003e\u0027 will be set to \u0027cmb_lgt\u0027 automatically.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"cap_stacked_chipseq_reads": {
|
||||||
|
"type":
|
||||||
|
"boolean",
|
||||||
|
"description": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval",
|
||||||
|
"help_text": "Type: `boolean_true`, default: `false`. Keep a maximum number of ChIP-seq reads that aligned to the same genomic interval. This option is used \nwhen running prior-enhanced RSEM, where prior is learned from multiple complementary data sets. This \noption is only in use when either \u0027--chipseq_read_files_multi_targets \u003cstring\u003e\u0027 or \n\u0027--chipseq_bed_files_multi_targets \u003cstring\u003e\u0027 is specified.\n"
|
||||||
|
,
|
||||||
|
"default": "False"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"n_max_stacked_chipseq_reads": {
|
||||||
|
"type":
|
||||||
|
"integer",
|
||||||
|
"description": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep",
|
||||||
|
"help_text": "Type: `integer`. The maximum number of stacked ChIP-seq reads to keep. This option is used when running prior-enhanced \nRSEM, where prior is learned from multiple complementary data sets. This option is only in use when the \noption \u0027--cap_stacked_chipseq_reads\u0027 is set.\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"partition_model": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM",
|
||||||
|
"help_text": "Type: `string`, example: `pk`. A keyword to specify the partition model used by prior-enhanced RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3, lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above models are learned from a training set. For detailed explanations, please \nsee prior-enhanced RSEM\u0027s paper. (Default: \u0027pk\u0027)\n"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
"nextflow input-output arguments" : {
|
||||||
|
"title": "Nextflow input-output arguments",
|
||||||
|
"type": "object",
|
||||||
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||||
|
"properties": {
|
||||||
|
|
||||||
|
|
||||||
|
"publish_dir": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||||
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
,
|
||||||
|
"param_list": {
|
||||||
|
"type":
|
||||||
|
"string",
|
||||||
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||||
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||||
|
"hidden": true
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"allOf": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/input"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/output"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/basic options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/aligner options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/advanced options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/prior-enhanced rsem options"
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/nextflow input-output arguments"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user